def get_summary_and_description_of_bug(cls, bug_id): """Get summary and description from mongodb. Args: bug_id: int Returns: [str, str], [summary, description] """ from ir_config import IRConfig from ir_mongodb_helper import IRCollection if cls.__is_cache: if bug_id in cls.__cache_summary_description: return cls.__cache_summary_description[bug_id] bug_id_name = IRConfig.get_instance().get('bug_id_name') summary_name = IRConfig.get_instance().get('bug_summary_name') description_name = IRConfig.get_instance().get('bug_description_name') text_collection = IRCollection('bug_db_name', 'bug_text_collection_name', 'r') res = text_collection.find({bug_id_name: bug_id}) summary = '' description = '' if res.count() > 0: summary = res[0][summary_name] description = res[0][description_name] if cls.__is_cache: cls.__cache_summary_description[bug_id] = (summary, description) return summary, description
def similarity_over_all(self): """Calculate similarity between bug (summary, description) over all. Returns: dict, {bug_id -> [score, summary_score, description_score, stacktrace_score]} """ from ir_log import IRLog from ir_config import IRConfig from ir_mongodb_helper import IRCollection from ir_text import IRText from ir_tfidf import IRTFIDF logger = IRLog.get_instance() search_time_span = 2 * 3600 * 24 * 365 bug_id_name = IRConfig.get_instance().get('bug_id_name') create_ts_name = IRConfig.get_instance().get('bug_create_ts_name') product_name = IRConfig.get_instance().get('bug_product_name') basic_collection = IRCollection('bug_db_name', 'bug_basic_collection_name', 'r') reports2scan = basic_collection.find({ product_name: self.get_product(), create_ts_name: { '$gt': self.get_create_ts() - search_time_span }, bug_id_name: { '$nin': self.__exclude_report_ids } }) result = {} IRLog.get_instance().println('Comparing with %d reports.' \ % (reports2scan.count()) ) print self.__summary_text print self.__description_text for report in reports2scan: bug_id = report[bug_id_name] if bug_id == self.get_dummy_bug_id(): continue # because we don't want to load stacktrace in case of self.__stacktrace # being none, we create and fill the info of report manually other_report = IRReport("", "") other_report.__summary_tfidf, other_report.__description_tfidf = \ IRTFIDF.get_tfidf_of_bug(bug_id) # if self.__stacktrace is empty, we don't need to do this if self.get_stacktrace() is not None and \ self.get_stacktrace().__len__() > 0: other_report.__stacktrace = IRText.get_stacktrace_of_bug( bug_id) if other_report.__stacktrace is None: other_report.__stacktrace = [] result[bug_id] = self.similarity_with(other_report) return result
def get_summary_and_description_of_bug(cls, bug_id): """Get summary and description from mongodb. Args: bug_id: int Returns: [str, str], [summary, description] """ from ir_config import IRConfig from ir_mongodb_helper import IRCollection if cls.__is_cache: if bug_id in cls.__cache_summary_description: return cls.__cache_summary_description[bug_id] bug_id_name = IRConfig.get_instance().get('bug_id_name') summary_name = IRConfig.get_instance().get('bug_summary_name') description_name = IRConfig.get_instance().get('bug_description_name') text_collection = IRCollection( 'bug_db_name', 'bug_text_collection_name', 'r') res = text_collection.find({bug_id_name : bug_id}) summary = '' description = '' if res.count() > 0: summary = res[0][summary_name] description = res[0][description_name] if cls.__is_cache: cls.__cache_summary_description[bug_id] = (summary, description) return summary, description
def cache_all_data(cls): """Load all data into memory.""" from ir_log import IRProgressBar from ir_config import IRConfig from ir_mongodb_helper import IRCollection # get config bug_id_name = IRConfig.get_instance().get('bug_id_name') summary_name = IRConfig.get_instance().get('bug_summary_name') description_name = IRConfig.get_instance().get('bug_description_name') stacktrace_name = IRConfig.get_instance().get('bug_stacktrace_name') # caching data cls.set_is_cache(True) text_collection = \ IRCollection('bug_db_name', 'bug_text_collection_name', 'r') cls.__cache_summary_description = {} cls.__cache_stacktrace = {} def iter_func(bug): cls.__cache_summary_description[bug[bug_id_name]] = \ (bug[summary_name], bug[description_name]) cls.__cache_stacktrace[bug[bug_id_name]] = bug[stacktrace_name] IRProgressBar.execute_iteration_for_cursor(text_collection.find(), iter_func, 'Caching Text Data') text_collection.close()
def get_stacktrace_of_bug(cls, bug_id): """Get stacktrace from mongodb. Args: bug_id: int Returns: [[str]], [[signature]] """ if cls.__is_cache: if bug_id in cls.__cache_stacktrace: return cls.__cache_stacktrace[bug_id] from ir_config import IRConfig from ir_mongodb_helper import IRCollection bug_id_name = IRConfig.get_instance().get('bug_id_name') stacktrace_name = IRConfig.get_instance().get('bug_stacktrace_name') text_collection = IRCollection( 'bug_db_name', 'bug_text_collection_name', 'r') res = text_collection.find({bug_id_name : bug_id}) stacktrace = [] if res.count() > 0: stacktrace = res[0][stacktrace_name] if cls.__is_cache: cls.__cache_stacktrace[bug_id] = stacktrace return stacktrace
def get_tfidf_of_bug(cls, bug_id): """Get tfidf of a bug. Args: bug_id: int Returns: [dict, dict], [TFIDF of summary, TFIDF of description] """ if cls.__is_cache: if bug_id in cls.__cache: return cls.__cache[bug_id] # load from db from ir_config import IRConfig from ir_mongodb_helper import IRCollection bug_id_name = IRConfig.get_instance().get('bug_id_name') summary_name = IRConfig.get_instance().get('bug_summary_name') description_name = IRConfig.get_instance().get('bug_description_name') tfidf_collection = IRCollection('bug_db_name', 'bug_tfidf_collection_name', 'r') find_result = tfidf_collection.find({bug_id_name: bug_id}) summary = {} description = {} if find_result.count() > 0: summary = find_result[0][summary_name] description = find_result[0][description_name] if cls.__is_cache: cls.__cache[bug_id] = (summary, description) return summary, description
def cache_all_data(cls): """Load all document count into memory. """ from ir_log import IRProgressBar from ir_config import IRConfig from ir_mongodb_helper import IRCollection # config summary_name = IRConfig.get_instance().get('bug_summary_name') description_name = IRConfig.get_instance().get('bug_description_name') term_name = IRConfig.get_instance().get('bug_term_name') cls.__is_cache = True documentcount_collection = IRCollection( 'bug_db_name', 'bug_documentcount_collection_name', 'r') def iter_document_count(term): summary = term[summary_name] if summary_name in term else 0 description = term[ description_name] if description_name in term else 0 cls.__cache_document_count[term[term_name]] = \ (summary, description) IRProgressBar.execute_iteration_for_cursor( documentcount_collection.find({}), iter_document_count, "Caching Document Count")
def get_tfidf_of_bug(cls, bug_id): """Get tfidf of a bug. Args: bug_id: int Returns: [dict, dict], [TFIDF of summary, TFIDF of description] """ if cls.__is_cache: if bug_id in cls.__cache: return cls.__cache[bug_id] # load from db from ir_config import IRConfig from ir_mongodb_helper import IRCollection bug_id_name = IRConfig.get_instance().get('bug_id_name') summary_name = IRConfig.get_instance().get('bug_summary_name') description_name = IRConfig.get_instance().get('bug_description_name') tfidf_collection = IRCollection( 'bug_db_name', 'bug_tfidf_collection_name', 'r') find_result = tfidf_collection.find({bug_id_name : bug_id}) summary = {} description = {} if find_result.count() > 0: summary = find_result[0][summary_name] description = find_result[0][description_name] if cls.__is_cache: cls.__cache[bug_id] = (summary, description) return summary, description
def get_termcount_of_bug(cls, bug_id): """Get termcount of a bug Args: bug_id: int Returns: [dict, dict], [termcount of summary, termcount of description] """ from ir_config import IRConfig from ir_mongodb_helper import IRCollection if cls.__is_cache: if bug_id in cls.__cache_term_count: return cls.__cache_term_count[bug_id] bug_id_name = IRConfig.get_instance().get('bug_id_name') summary_name = IRConfig.get_instance().get('bug_summary_name') description_name = IRConfig.get_instance().get('bug_description_name') termcount_collection = IRCollection( 'bug_db_name', 'bug_termcount_collection_name', 'r') res = termcount_collection.find({bug_id_name : bug_id}) summary = {} description = {} if res.count() > 0: summary = res[0][summary_name] description = res[0][description_name] if cls.__is_cache: cls.__cache_term_count[bug_id] = (summary, description) return summary, description
def get_stacktrace_of_bug(cls, bug_id): """Get stacktrace from mongodb. Args: bug_id: int Returns: [[str]], [[signature]] """ if cls.__is_cache: if bug_id in cls.__cache_stacktrace: return cls.__cache_stacktrace[bug_id] from ir_config import IRConfig from ir_mongodb_helper import IRCollection bug_id_name = IRConfig.get_instance().get('bug_id_name') stacktrace_name = IRConfig.get_instance().get('bug_stacktrace_name') text_collection = IRCollection('bug_db_name', 'bug_text_collection_name', 'r') res = text_collection.find({bug_id_name: bug_id}) stacktrace = [] if res.count() > 0: stacktrace = res[0][stacktrace_name] if cls.__is_cache: cls.__cache_stacktrace[bug_id] = stacktrace return stacktrace
def show_distribution_on_product_and_create_ts(cls): """Show the distribution of create time and number of products on each duplicate group. """ from ir_log import IRLog from ir_log import IRProgressBar from ir_config import IRConfig from ir_mongodb_helper import IRCollection bug2group_collection = IRCollection('bug_db_name', 'bug_duplicate_collection_name', 'r') basic_collection = IRCollection('bug_db_name', 'bug_basic_collection_name', 'r') bug_id_name = IRConfig.get_instance().get('bug_id_name') group_name = IRConfig.get_instance().get('bug_group_name') product_name = IRConfig.get_instance().get('bug_product_name') create_ts_name = IRConfig.get_instance().get('bug_create_ts_name') group_ids = bug2group_collection.distinct(group_name) progress_bar = IRProgressBar(group_ids.__len__(), "group", False, 0, 1) group_num = 0 for group_id in group_ids: group_num += 1 progress_bar.set_value(group_num) bugs = bug2group_collection.find({group_name: group_id}) min_ts = 9999999999 max_ts = -1000 product_set = set() for bug in bugs: bug_id = bug[bug_id_name] basic = basic_collection.find({bug_id_name: bug_id}) if basic.count() == 0: continue ts = basic[0][create_ts_name] product = basic[0][product_name] # ts if ts > max_ts: max_ts = ts if ts < min_ts: min_ts = ts # product product_set.add(product) IRLog.get_instance().println('ts span:%d;product number:%d' \ % (max_ts - min_ts, product_set.__len__()), 2)
def show_distribution_on_product_and_create_ts(cls): """Show the distribution of create time and number of products on each duplicate group. """ from ir_log import IRLog from ir_log import IRProgressBar from ir_config import IRConfig from ir_mongodb_helper import IRCollection bug2group_collection = IRCollection( 'bug_db_name', 'bug_duplicate_collection_name', 'r') basic_collection = IRCollection( 'bug_db_name', 'bug_basic_collection_name', 'r') bug_id_name = IRConfig.get_instance().get('bug_id_name') group_name = IRConfig.get_instance().get('bug_group_name') product_name = IRConfig.get_instance().get('bug_product_name') create_ts_name = IRConfig.get_instance().get('bug_create_ts_name') group_ids = bug2group_collection.distinct(group_name) progress_bar = IRProgressBar(group_ids.__len__(), "group", False, 0, 1) group_num = 0 for group_id in group_ids: group_num += 1 progress_bar.set_value(group_num) bugs = bug2group_collection.find({group_name : group_id}) min_ts = 9999999999 max_ts = -1000 product_set = set() for bug in bugs: bug_id = bug[bug_id_name] basic = basic_collection.find({bug_id_name : bug_id}) if basic.count() == 0: continue ts = basic[0][create_ts_name] product = basic[0][product_name] # ts if ts > max_ts: max_ts = ts if ts < min_ts: min_ts = ts # product product_set.add(product) IRLog.get_instance().println('ts span:%d;product number:%d' \ % (max_ts - min_ts, product_set.__len__()), 2)
def get_documentcount(cls, term, field=None, documentcount_collection=None): """Get documentcount of a term. Args: term, str Returns: if field == None: (int, int), (summary document count, description document count) else: int, the document count of corresponding field """ if cls.__is_cache and term in cls.__cache_document_count: if field is None: return cls.__cache_document_count[term] else: from ir_config import IRConfig summary_name = IRConfig.get_instance().get('bug_summary_name') description_name = IRConfig.get_instance().get( 'bug_description_name') if field == summary_name: return cls.__cache_document_count[term][0] elif field == description_name: return cls.__cache_document_count[term][1] else: return 0 # load from db from ir_mongodb_helper import IRCollection from ir_config import IRConfig term_name = IRConfig.get_instance().get('bug_term_name') summary_name = IRConfig.get_instance().get('bug_summary_name') description_name = IRConfig.get_instance().get('bug_description_name') if documentcount_collection is None: documentcount_collection = IRCollection( 'bug_db_name', 'bug_documentcount_collection_name', 'r') res = documentcount_collection.find({term_name: term}) summary = 0 description = 0 if res.count() > 0: if summary_name in res[0]: summary = res[0][summary_name] if description_name in res[0]: description = res[0][description_name] if cls.__is_cache: cls.__cache_document_count[term] = (summary, description) # return value if field is None: return summary, description elif field == summary_name: return summary elif field == description_name: return description else: return 0
def similarity_over_all(self): """Calculate similarity between bug (summary, description) over all. Returns: dict, {bug_id -> [score, summary_score, description_score, stacktrace_score]} """ from ir_log import IRLog from ir_config import IRConfig from ir_mongodb_helper import IRCollection from ir_text import IRText from ir_tfidf import IRTFIDF logger = IRLog.get_instance() search_time_span = 2 * 3600 * 24 * 365 bug_id_name = IRConfig.get_instance().get('bug_id_name') create_ts_name = IRConfig.get_instance().get('bug_create_ts_name') product_name = IRConfig.get_instance().get('bug_product_name') basic_collection = IRCollection( 'bug_db_name', 'bug_basic_collection_name', 'r') reports2scan = basic_collection.find({ product_name : self.get_product(), create_ts_name : {'$gt' : self.get_create_ts() - search_time_span}, bug_id_name : {'$nin' : self.__exclude_report_ids} }) result = {} IRLog.get_instance().println('Comparing with %d reports.' \ % (reports2scan.count()) ) print self.__summary_text print self.__description_text for report in reports2scan: bug_id = report[bug_id_name] if bug_id == self.get_dummy_bug_id(): continue # because we don't want to load stacktrace in case of self.__stacktrace # being none, we create and fill the info of report manually other_report = IRReport("", "") other_report.__summary_tfidf, other_report.__description_tfidf = \ IRTFIDF.get_tfidf_of_bug(bug_id) # if self.__stacktrace is empty, we don't need to do this if self.get_stacktrace() is not None and \ self.get_stacktrace().__len__() > 0: other_report.__stacktrace = IRText.get_stacktrace_of_bug(bug_id) if other_report.__stacktrace is None: other_report.__stacktrace = [] result[bug_id] = self.similarity_with(other_report) return result
def get_documentcount(cls, term, field = None, documentcount_collection = None): """Get documentcount of a term. Args: term, str Returns: if field == None: (int, int), (summary document count, description document count) else: int, the document count of corresponding field """ if cls.__is_cache and term in cls.__cache_document_count: if field is None: return cls.__cache_document_count[term] else: from ir_config import IRConfig summary_name = IRConfig.get_instance().get('bug_summary_name') description_name = IRConfig.get_instance().get('bug_description_name') if field == summary_name: return cls.__cache_document_count[term][0] elif field == description_name: return cls.__cache_document_count[term][1] else: return 0 # load from db from ir_mongodb_helper import IRCollection from ir_config import IRConfig term_name = IRConfig.get_instance().get('bug_term_name') summary_name = IRConfig.get_instance().get('bug_summary_name') description_name = IRConfig.get_instance().get('bug_description_name') if documentcount_collection is None: documentcount_collection = IRCollection( 'bug_db_name', 'bug_documentcount_collection_name', 'r') res = documentcount_collection.find({term_name : term}) summary = 0 description = 0 if res.count() > 0: if summary_name in res[0]: summary = res[0][summary_name] if description_name in res[0]: description = res[0][description_name] if cls.__is_cache: cls.__cache_document_count[term] = (summary, description) # return value if field is None: return summary, description elif field == summary_name: return summary elif field == description_name: return description else: return 0
def get_iterator(cls, arg): """Get the cursor to the items fulfill arg. Args: arg: dict, condition Returns: cursor """ from ir_mongodb_helper import IRCollection text_collection = IRCollection('bug_db_name', 'bug_text_collection_name', 'r') return text_collection.find(arg)
def get_iterator(cls, arg): """Get the cursor to the items fulfill arg. Args: arg: dict, condition Returns: cursor """ from ir_mongodb_helper import IRCollection text_collection = IRCollection( 'bug_db_name', 'bug_text_collection_name', 'r') return text_collection.find(arg)
def get_iterator(cls, arg=None): """Get iterator of termcounts fulfiling arg. Args: arg: dict, Condiction. Returns: cursor """ if not arg: arg = {} from ir_mongodb_helper import IRCollection termcount_collection = IRCollection( 'bug_db_name', 'bug_termcount_collection_name', 'r') return termcount_collection.find(arg)
def cache_all_data(cls): """Load all TFIDF into memory.""" from ir_log import IRProgressBar from ir_config import IRConfig from ir_mongodb_helper import IRCollection bug_id_name = IRConfig.get_instance().get('bug_id_name') summary_name = IRConfig.get_instance().get('bug_summary_name') description_name = IRConfig.get_instance().get('bug_description_name') tfidf_collection = IRCollection( 'bug_db_name', 'bug_tfidf_collection_name', 'r') cls.set_is_cache(True) cls.__cache = {} def iter_tfidf(bug): cls.__cache[bug[bug_id_name]] = (bug[summary_name], bug[description_name]) IRProgressBar.execute_iteration_for_cursor(tfidf_collection.find(), iter_tfidf, "Caching TFIDF")
def test_parse_info_level0(self): from ir_log import IRLog from ir_config import IRConfig from ir_mongodb_helper import IRCollection from ir_duplicate_group import IRDuplicateGroup IRLog.get_instance().start_log() IRConfig.get_instance().load('../data/test/bug_test.cfg') duplicate_group = IRDuplicateGroup() duplicate_group.parse_info_level0('../data/test/info_level0_test') #test if incomplete bugs have been removed bug2group = IRCollection('bug_db_name', 'bug_duplicate_collection_name', 'r') assert bug2group is not None res = bug2group.find({'bug_id': 102500}) assert res.count() == 0 IRLog.get_instance().stop_log()
def test_parse_info_level0(self): from ir_log import IRLog from ir_config import IRConfig from ir_mongodb_helper import IRCollection from ir_duplicate_group import IRDuplicateGroup IRLog.get_instance().start_log() IRConfig.get_instance().load('../data/test/bug_test.cfg') duplicate_group = IRDuplicateGroup() duplicate_group.parse_info_level0('../data/test/info_level0_test') #test if incomplete bugs have been removed bug2group = IRCollection( 'bug_db_name', 'bug_duplicate_collection_name', 'r') assert bug2group is not None res = bug2group.find({'bug_id':102500}) assert res.count() == 0 IRLog.get_instance().stop_log()
def cache_all_data(cls): """Load all TFIDF into memory.""" from ir_log import IRProgressBar from ir_config import IRConfig from ir_mongodb_helper import IRCollection bug_id_name = IRConfig.get_instance().get('bug_id_name') summary_name = IRConfig.get_instance().get('bug_summary_name') description_name = IRConfig.get_instance().get('bug_description_name') tfidf_collection = IRCollection('bug_db_name', 'bug_tfidf_collection_name', 'r') cls.set_is_cache(True) cls.__cache = {} def iter_tfidf(bug): cls.__cache[bug[bug_id_name]] = (bug[summary_name], bug[description_name]) IRProgressBar.execute_iteration_for_cursor(tfidf_collection.find(), iter_tfidf, "Caching TFIDF")
def get_bugs_in_group(cls, group_id): """Get bugs in a group. Args: group_id: int Returns: [int], [bug_id] """ from ir_config import IRConfig from ir_mongodb_helper import IRCollection duplicate_collection =IRCollection( 'bug_db_name', 'bug_duplicate_collection_name', 'r') bug_id_name = IRConfig.get_instance().get('bug_id_name') group_name = IRConfig.get_instance().get('bug_group_name') find_result = duplicate_collection.find({group_name : group_id}) return [bug[bug_id_name] for bug in find_result]
def get_bugs_in_group(cls, group_id): """Get bugs in a group. Args: group_id: int Returns: [int], [bug_id] """ from ir_config import IRConfig from ir_mongodb_helper import IRCollection duplicate_collection = IRCollection('bug_db_name', 'bug_duplicate_collection_name', 'r') bug_id_name = IRConfig.get_instance().get('bug_id_name') group_name = IRConfig.get_instance().get('bug_group_name') find_result = duplicate_collection.find({group_name: group_id}) return [bug[bug_id_name] for bug in find_result]
def get_basic_info_of_bug(cls, bug_id): """Get basic info from mongodb. Args: bug_id: int Returns: (int, str): (create_ts, product) """ from ir_config import IRConfig from ir_mongodb_helper import IRCollection bug_id_name = IRConfig.get_instance().get('bug_id_name') create_ts_name = IRConfig.get_instance().get('bug_create_ts_name') product_name = IRConfig.get_instance().get('bug_product_name') basic_collection = IRCollection('bug_db_name', 'bug_basic_collection_name', 'r') res = basic_collection.find({bug_id_name: bug_id}) if res.count() > 0: return res[0][create_ts_name], res[0][product_name] else: return -1, ''
def get_basic_info_of_bug(cls, bug_id): """Get basic info from mongodb. Args: bug_id: int Returns: (int, str): (create_ts, product) """ from ir_config import IRConfig from ir_mongodb_helper import IRCollection bug_id_name = IRConfig.get_instance().get('bug_id_name') create_ts_name = IRConfig.get_instance().get('bug_create_ts_name') product_name = IRConfig.get_instance().get('bug_product_name') basic_collection = IRCollection( 'bug_db_name', 'bug_basic_collection_name', 'r') res = basic_collection.find({bug_id_name : bug_id}) if res.count() > 0: return res[0][create_ts_name], res[0][product_name] else: return -1, ''
def get_duplicate_group_information(cls, group_size_min, group_size_max): """Calculate the size of duplicate group. Args: group_size_min: int, The minimum size of wanted group. group_size_max: int, The maximum size of wanted group. Returns: [int], [group_id] """ from ir_log import IRLog from ir_config import IRConfig from ir_mongodb_helper import IRCollection duplicate_group_count_collection = IRCollection( 'bug_db_name', 'bug_duplicate_group_count_collection_name', 'r') group_name = IRConfig.get_instance().get('bug_group_name') group_size_name = IRConfig.get_instance().get('bug_group_size') result = duplicate_group_count_collection.find({group_size_name : \ {"$gt":group_size_min, "$lt":group_size_max}}) return [group[group_name] for group in result]
def cache_all_data(cls): """Load all data into memory.""" from ir_log import IRProgressBar from ir_config import IRConfig from ir_mongodb_helper import IRCollection # get config bug_id_name = IRConfig.get_instance().get('bug_id_name') summary_name = IRConfig.get_instance().get('bug_summary_name') description_name = IRConfig.get_instance().get('bug_description_name') stacktrace_name = IRConfig.get_instance().get('bug_stacktrace_name') # caching data cls.set_is_cache(True) text_collection = \ IRCollection('bug_db_name', 'bug_text_collection_name', 'r') cls.__cache_summary_description = {} cls.__cache_stacktrace = {} def iter_func(bug): cls.__cache_summary_description[bug[bug_id_name]] = \ (bug[summary_name], bug[description_name]) cls.__cache_stacktrace[bug[bug_id_name]] = bug[stacktrace_name] IRProgressBar.execute_iteration_for_cursor( text_collection.find(), iter_func, 'Caching Text Data') text_collection.close()
def cache_all_data(cls): """Load all document count into memory. """ from ir_log import IRProgressBar from ir_config import IRConfig from ir_mongodb_helper import IRCollection # config summary_name = IRConfig.get_instance().get('bug_summary_name') description_name = IRConfig.get_instance().get('bug_description_name') term_name = IRConfig.get_instance().get('bug_term_name') cls.__is_cache = True documentcount_collection = IRCollection( 'bug_db_name', 'bug_documentcount_collection_name', 'r') def iter_document_count(term): summary = term[summary_name] if summary_name in term else 0 description = term[description_name] if description_name in term else 0 cls.__cache_document_count[term[term_name]] = \ (summary, description) IRProgressBar.execute_iteration_for_cursor( documentcount_collection.find({}), iter_document_count, "Caching Document Count")
def get_group_of_bug(cls, bug_id): """Get the group id of a bug. Args: bug_id: int Returns: int, group_id """ from ir_config import IRConfig from ir_mongodb_helper import IRCollection duplicate_collection = IRCollection( 'bug_db_name', 'bug_duplicate_collection_name', 'r') bug_id_name = IRConfig.get_instance().get('bug_id_name') group_name = IRConfig.get_instance().get('bug_group_name') result = duplicate_collection.find({bug_id_name : bug_id}) if result.count() == 0: return None else: return result[0][group_name]
def get_group_of_bug(cls, bug_id): """Get the group id of a bug. Args: bug_id: int Returns: int, group_id """ from ir_config import IRConfig from ir_mongodb_helper import IRCollection duplicate_collection = IRCollection('bug_db_name', 'bug_duplicate_collection_name', 'r') bug_id_name = IRConfig.get_instance().get('bug_id_name') group_name = IRConfig.get_instance().get('bug_group_name') result = duplicate_collection.find({bug_id_name: bug_id}) if result.count() == 0: return None else: return result[0][group_name]
if __name__ == '__main__': import sys from ir_config import IRConfig from ir_text import IRText from ir_mongodb_helper import IRCollection config = IRConfig.get_instance() config.load(sys.argv[1]) product_name = config.get('bug_product_name') products = dict() basic = IRCollection('bug_db_name', 'bug_basic_collection_name', 'r') cursor = basic.find(None) for bug in cursor: product = bug[product_name] if product not in products: products[product] = 0 products[product] += 1 product_list = products.items() product_list.sort(cmp=lambda x,y:cmp(x[1],y[1]), reverse=True) prefix = '' if sys.argv.__len__() < 3 else sys.argv[2] surfix = '' if sys.argv.__len__() < 4 else sys.argv[3] threshold = 100 if sys.argv.__len__() <5 else int(sys.argv[4]) for product in product_list: if product[1] < threshold: break