def test_problem_json(self): problem = get_problem_by_id(self.db, 1) build = Build() build.base_package_name = "kernel" build.epoch = 0 build.version = "3.12.10" build.release = "301.fc20" self.db.session.add(build) release = ProblemOpSysRelease() release.opsysrelease = get_releases(self.db, 'Fedora', '20').first() release.probable_fix_build = build release.problem = problem self.db.session.add(release) problem.reports[0].max_certainty = 99 self.db.session.commit() response = self.app.get('/problems/%d/' % problem.id, headers=([('Accept', 'application/json')])) self.assertEqual(response.mimetype, "application/json") data = json.loads(response.get_data(as_text=True)) self.assertIn("solutions", data) self.assertEqual(len(data["solutions"]), 1) self.assertIn("note_text", data["solutions"][0]) self.assertIn("%s-%s" % (build.version, build.release), data["solutions"][0]["note_text"])
def _create_problems(self, db, problemplugin, report_min_count=0, speedup=False): if speedup: db_reports = get_reports_for_problems(db, problemplugin.name) db_reports += get_unassigned_reports(db, problemplugin.name, min_count=report_min_count) else: db_reports = get_reports_by_type(db, problemplugin.name, min_count=report_min_count) db_problems = get_problems(db) # dict to get db_problem by problem_id self.log_debug("Creating problem reuse dict") problems_dict = {} for db_problem in db_problems: problems_dict[db_problem.id] = db_problem # dict to get report_ids by problem_id problem_report = defaultdict(list) for db_report in db_reports: if db_report.problem_id is not None: problem_report[db_report.problem_id].append(db_report.id) # create lookup dict for problems reuse_problems = {} for (problem_id, report_ids) in problem_report.items(): reuse_problems[tuple(sorted(report_ids))] = problem_id invalid_report_ids_to_clean = [] problems = [] if not db_reports: self.log_info("No reports found") elif len(db_reports) == 1: db_report = db_reports[0] if db_report.problem is None: problems.append([db_report]) else: report_map = {} _satyr_reports = [] i = 0 for db_report in db_reports: i += 1 self.log_debug("[{0} / {1}] Loading report #{2}" .format(i, len(db_reports), db_report.id)) _satyr_report = problemplugin._db_report_to_satyr(db_report) if _satyr_report is None: self.log_debug("Unable to create satyr report") if db_report.problem_id is not None: invalid_report_ids_to_clean.append(db_report.id) else: _satyr_reports.append(_satyr_report) report_map[_satyr_report] = db_report db.session.expire(db_report) self.log_debug("Clustering") clusters = self._create_clusters(_satyr_reports, 2000) # Threads that share no function with another thread unique_func_threads = set(_satyr_reports) - set().union(*clusters) dendrograms = [] i = 0 for cluster in clusters: i += 1 self.log_debug("[{0} / {1}] Computing distances" .format(i, len(clusters))) distances = satyr.Distances(cluster, len(cluster)) self.log_debug("Getting dendrogram") dendrograms.append(satyr.Dendrogram(distances)) for dendrogram, cluster in zip(dendrograms, clusters): problem = [] for dups in dendrogram.cut(0.3, 1): reports = set(report_map[cluster[dup]] for dup in dups) problem.append(reports) problems.extend(problem) # Unique threads form their own unique problems for thread in unique_func_threads: problems.append({report_map[thread]}) self.log_info("Creating problems from clusters") if speedup: for problem in problems: if not problem: continue first_report = next(iter(problem)) if len(problem) > 1: # Find assigned report origin_report = None for db_report in problem: if db_report.problem_id: origin_report = db_report # Problem created only from new reports comps = {} if not origin_report: new = Problem() db.session.add(new) db.session.flush() first_occurrence = first_report.first_occurrence last_occurrence = first_report.last_occurrence for rep in problem: rep.problem_id = new.id if first_occurrence > rep.first_occurrence: first_occurrence = rep.first_occurrence if last_occurrence < rep.last_occurrence: last_occurrence = rep.last_occurrence if rep.component not in comps: comps[rep.component] = 0 comps[rep.component] += 1 self.update_comps(db, comps, new) new.last_occurrence = last_occurrence new.first_occurrence = first_occurrence else: first_occurrence = origin_report.first_occurrence last_occurrence = origin_report.last_occurrence for rep in problem: if not rep.problem_id: rep.problem_id = origin_report.problem_id if first_occurrence > rep.first_occurrence: first_occurrence = rep.first_occurrence if last_occurrence < rep.last_occurrence: last_occurrence = rep.last_occurrence if rep.component not in comps: comps[rep.component] = 0 comps[rep.component] += 1 orig_p = get_problem_by_id(db, origin_report.problem_id) self.update_comps(db, comps, orig_p) orig_p.last_occurrence = last_occurrence orig_p.first_occurrence = first_occurrence else: # The report is assigned if first_report.problem_id: continue else: # One report that wasn't matched with anything else new = Problem() new.first_occurrence = first_report.first_occurrence new.last_occurrence = first_report.last_occurrence db.session.add(new) db.session.flush() self.update_comps(db, {first_report.component: 1}, new) first_report.problem_id = new.id db.session.flush() else: for problem, db_problem, reports_changed in self._iter_problems( db, problems, db_problems, problems_dict, reuse_problems): comps = {} problem_last_occurrence = None problem_first_occurrence = None for db_report in problem: db_report.problem = db_problem if (problem_last_occurrence is None or problem_last_occurrence < db_report.last_occurrence): problem_last_occurrence = db_report.last_occurrence if (problem_first_occurrence is None or problem_first_occurrence > db_report.first_occurrence): problem_first_occurrence = db_report.first_occurrence if db_report.component not in comps: comps[db_report.component] = 0 comps[db_report.component] += 1 # In case nothing changed, we don't want to mark db_problem # dirty which would cause another UPDATE if db_problem.first_occurrence != problem_first_occurrence: db_problem.first_occurrence = problem_first_occurrence if db_problem.last_occurrence != problem_last_occurrence: db_problem.last_occurrence = problem_last_occurrence if reports_changed: self.update_comps(db, comps, db_problem) self.log_debug("Removing {0} invalid reports from problems" .format(len(invalid_report_ids_to_clean))) for report_id in invalid_report_ids_to_clean: db_report = get_report_by_id(db, report_id) if db_report is not None: db_report.problem_id = None db.session.add(db_report) if report_min_count > 0: self.log_debug("Removing problems from low count reports") remove_problem_from_low_count_reports_by_type(db, problemplugin.name, min_count=report_min_count) self.log_debug("Flushing session") db.session.flush()
def _create_problems( self, db, problemplugin, #pylint: disable=too-many-statements report_min_count=0, speedup=False): if speedup: self.log_debug("[%s] Getting reports for problems", problemplugin.name) db_reports = get_reports_for_problems(db, problemplugin.name) self.log_debug("[%s] Getting unassigned reports", problemplugin.name) db_reports += get_unassigned_reports(db, problemplugin.name, min_count=report_min_count) else: db_reports = get_reports_by_type(db, problemplugin.name, min_count=report_min_count) db_problems = get_problems(db) # dict to get db_problem by problem_id self.log_debug("Creating problem reuse dict") problems_dict = {} for db_problem in db_problems: problems_dict[db_problem.id] = db_problem # dict to get report_ids by problem_id problem_report = defaultdict(list) for db_report in db_reports: if db_report.problem_id is not None: problem_report[db_report.problem_id].append(db_report.id) # create lookup dict for problems reuse_problems = {} for (problem_id, report_ids) in problem_report.items(): reuse_problems[tuple(sorted(report_ids))] = problem_id invalid_report_ids_to_clean = [] problems = [] if not db_reports: self.log_info("No reports found") elif len(db_reports) == 1: db_report = db_reports[0] if db_report.problem is None: problems.append([db_report]) else: report_map = {} _satyr_reports = [] db_reports_len = len(db_reports) n_processed = 1 # split the work to multiple workers with ThreadPoolExecutor(self._max_workers) as executor: # schedule db_reports for processing futures = { executor.submit(problemplugin.db_report_to_satyr, report): report for report in db_reports } for future in as_completed(futures): db_report = futures.pop(future) self.log_debug("[%d / %d] Loading report #%d", n_processed, db_reports_len, db_report.id) _satyr_report = future.result() if _satyr_report is None: self.log_debug("Unable to create satyr report") if db_report.problem_id is not None: invalid_report_ids_to_clean.append(db_report.id) else: _satyr_reports.append(_satyr_report) report_map[_satyr_report] = db_report n_processed += 1 db.session.expire_all() self.log_debug("Clustering") clusters = self._create_clusters(_satyr_reports, 2000) # Threads that share no function with another thread unique_func_threads = set(_satyr_reports) - set().union(*clusters) dendrograms = [] clusters_len = len(clusters) for i, cluster in enumerate(clusters, start=1): self.log_debug("[%d / %d] Computing distances", i, clusters_len) distances = satyr.Distances(cluster, len(cluster)) self.log_debug("Getting dendrogram") dendrograms.append(satyr.Dendrogram(distances)) dendogram_cut = 0.3 if speedup: dendogram_cut = dendogram_cut * 1.1 for dendrogram, cluster in zip(dendrograms, clusters): problem = [] for dups in dendrogram.cut(dendogram_cut, 1): reports = set(report_map[cluster[dup]] for dup in dups) problem.append(reports) problems.extend(problem) # Unique threads form their own unique problems for thread in unique_func_threads: problems.append({report_map[thread]}) self.log_info("Creating problems from clusters") if speedup: for problem in problems: if not problem: continue first_report = next(iter(problem)) if len(problem) > 1: # Find assigned report origin_report = None for db_report in problem: if db_report.problem_id: origin_report = db_report # Problem created only from new reports comps = {} if not origin_report: new = Problem() db.session.add(new) db.session.flush() first_occurrence = first_report.first_occurrence last_occurrence = first_report.last_occurrence for rep in problem: rep.problem_id = new.id if first_occurrence > rep.first_occurrence: first_occurrence = rep.first_occurrence if last_occurrence < rep.last_occurrence: last_occurrence = rep.last_occurrence if rep.component not in comps: comps[rep.component] = 0 comps[rep.component] += 1 self.update_comps(db, comps, new) new.last_occurrence = last_occurrence new.first_occurrence = first_occurrence else: first_occurrence = origin_report.first_occurrence last_occurrence = origin_report.last_occurrence for rep in problem: if not rep.problem_id: rep.problem_id = origin_report.problem_id if first_occurrence > rep.first_occurrence: first_occurrence = rep.first_occurrence if last_occurrence < rep.last_occurrence: last_occurrence = rep.last_occurrence if rep.component not in comps: comps[rep.component] = 0 comps[rep.component] += 1 orig_p = get_problem_by_id(db, origin_report.problem_id) self.update_comps(db, comps, orig_p) orig_p.last_occurrence = last_occurrence orig_p.first_occurrence = first_occurrence else: # The report is assigned if first_report.problem_id: continue # One report that wasn't matched with anything else new = Problem() new.first_occurrence = first_report.first_occurrence new.last_occurrence = first_report.last_occurrence db.session.add(new) db.session.flush() self.update_comps(db, {first_report.component: 1}, new) first_report.problem_id = new.id db.session.flush() else: for problem, db_problem, reports_changed in self._iter_problems( db, problems, db_problems, problems_dict, reuse_problems): comps = {} problem_last_occurrence = None problem_first_occurrence = None for db_report in problem: db_report.problem = db_problem if (problem_last_occurrence is None or problem_last_occurrence < db_report.last_occurrence): problem_last_occurrence = db_report.last_occurrence if (problem_first_occurrence is None or problem_first_occurrence > db_report.first_occurrence): problem_first_occurrence = db_report.first_occurrence if db_report.component not in comps: comps[db_report.component] = 0 comps[db_report.component] += 1 # In case nothing changed, we don't want to mark db_problem # dirty which would cause another UPDATE if db_problem.first_occurrence != problem_first_occurrence: db_problem.first_occurrence = problem_first_occurrence if db_problem.last_occurrence != problem_last_occurrence: db_problem.last_occurrence = problem_last_occurrence if reports_changed: self.update_comps(db, comps, db_problem) self.log_debug("Removing %d invalid reports from problems", len(invalid_report_ids_to_clean)) unassign_reports(db, invalid_report_ids_to_clean) if report_min_count > 0: self.log_debug("Removing problems from low count reports") remove_problem_from_low_count_reports_by_type( db, problemplugin.name, min_count=report_min_count) self.log_debug("Flushing session") db.session.flush()