示例#1
0
    def run(self, cmdline, db):
        if cmdline.problemtype is None or len(cmdline.problemtype) < 1:
            ptypes = list(problemtypes.keys())
        else:
            ptypes = []
            for ptype in cmdline.problemtype:
                if ptype not in problemtypes:
                    self.log_warn(
                        "Problem type '{0}' is not supported".format(ptype))
                    continue

                ptypes.append(ptype)

        if not ptypes:
            self.log_info("Nothing to do")
            return 0

        i = 0
        for ptype in ptypes:
            i += 1

            problemtype = problemtypes[ptype]

            self.log_info("[{0} / {1}] Processing problem type '{2}'".format(
                i, len(ptypes), problemtype.nice_name))
            db_reports = get_reports_by_type(db, ptype)

            j = 0
            for db_report in db_reports:
                j += 1

                self.log_info("  [{0} / {1}] Processing report #{2}".format(
                    j, len(db_reports), db_report.id))

                hashes = set()
                k = 0
                for db_backtrace in db_report.backtraces:
                    k += 1

                    self.log_debug(
                        "    [{0} / {1}] Processing backtrace #{2}".format(
                            k, len(db_report.backtraces), db_backtrace.id))
                    try:
                        component = db_report.component.name
                        include_offset = ptype.lower() == "python"
                        bthash = self._hash_backtrace(db_backtrace,
                                                      hashbase=[component],
                                                      offset=include_offset)
                        self.log_debug("    {0}".format(bthash))
                        db_dup = get_report(db, bthash)
                        if db_dup is None:
                            self.log_info(
                                "    Adding hash '{0}'".format(bthash))
                            if not bthash in hashes:
                                db_reporthash = ReportHash()
                                db_reporthash.report = db_report
                                db_reporthash.hash = bthash
                                db.session.add(db_reporthash)
                                hashes.add(bthash)
                        elif db_dup == db_report:
                            self.log_debug(
                                "    Hash '{0}' already assigned".format(
                                    bthash))
                        else:
                            self.log_warn(
                                ("    Conflict! Skipping hash '{0}'"
                                 " (report #{1})").format(bthash, db_dup.id))
                    except FafError as ex:
                        self.log_warn("    {0}".format(str(ex)))
                        continue

                db.session.flush()
        return 0
示例#2
0
    def _create_problems(self, db, problemplugin,
                         report_min_count=0, speedup=False):
        if speedup:
            db_reports = get_reports_for_problems(db, problemplugin.name)
            db_reports += get_unassigned_reports(db, problemplugin.name,
                                                 min_count=report_min_count)
        else:
            db_reports = get_reports_by_type(db, problemplugin.name,
                                             min_count=report_min_count)
        db_problems = get_problems(db)

        # dict to get db_problem by problem_id
        self.log_debug("Creating problem reuse dict")
        problems_dict = {}
        for db_problem in db_problems:
            problems_dict[db_problem.id] = db_problem
        # dict to get report_ids by problem_id
        problem_report = defaultdict(list)
        for db_report in db_reports:
            if db_report.problem_id is not None:
                problem_report[db_report.problem_id].append(db_report.id)
        # create lookup dict for problems
        reuse_problems = {}
        for (problem_id, report_ids) in problem_report.items():
            reuse_problems[tuple(sorted(report_ids))] = problem_id

        invalid_report_ids_to_clean = []
        problems = []
        if not db_reports:
            self.log_info("No reports found")
        elif len(db_reports) == 1:
            db_report = db_reports[0]
            if db_report.problem is None:
                problems.append([db_report])
        else:
            report_map = {}
            _satyr_reports = []
            i = 0
            for db_report in db_reports:
                i += 1
                self.log_debug("[{0} / {1}] Loading report #{2}"
                               .format(i, len(db_reports), db_report.id))

                _satyr_report = problemplugin._db_report_to_satyr(db_report)
                if _satyr_report is None:
                    self.log_debug("Unable to create satyr report")
                    if db_report.problem_id is not None:
                        invalid_report_ids_to_clean.append(db_report.id)
                else:
                    _satyr_reports.append(_satyr_report)
                    report_map[_satyr_report] = db_report

                db.session.expire(db_report)

            self.log_debug("Clustering")
            clusters = self._create_clusters(_satyr_reports, 2000)
            # Threads that share no function with another thread
            unique_func_threads = set(_satyr_reports) - set().union(*clusters)

            dendrograms = []
            i = 0
            for cluster in clusters:
                i += 1
                self.log_debug("[{0} / {1}] Computing distances"
                               .format(i, len(clusters)))
                distances = satyr.Distances(cluster, len(cluster))

                self.log_debug("Getting dendrogram")
                dendrograms.append(satyr.Dendrogram(distances))

            for dendrogram, cluster in zip(dendrograms, clusters):
                problem = []
                for dups in dendrogram.cut(0.3, 1):
                    reports = set(report_map[cluster[dup]] for dup in dups)
                    problem.append(reports)

                problems.extend(problem)

            # Unique threads form their own unique problems
            for thread in unique_func_threads:
                problems.append({report_map[thread]})

        self.log_info("Creating problems from clusters")
        if speedup:
            for problem in problems:
                if not problem:
                    continue
                first_report = next(iter(problem))
                if len(problem) > 1:
                    # Find assigned report
                    origin_report = None
                    for db_report in problem:
                        if db_report.problem_id:
                            origin_report = db_report

                    # Problem created only from new reports
                    comps = {}
                    if not origin_report:
                        new = Problem()
                        db.session.add(new)
                        db.session.flush()
                        first_occurrence = first_report.first_occurrence
                        last_occurrence = first_report.last_occurrence
                        for rep in problem:
                            rep.problem_id = new.id

                            if first_occurrence > rep.first_occurrence:
                                first_occurrence = rep.first_occurrence
                            if last_occurrence < rep.last_occurrence:
                                last_occurrence = rep.last_occurrence

                            if rep.component not in comps:
                                comps[rep.component] = 0

                            comps[rep.component] += 1
                        self.update_comps(db, comps, new)
                        new.last_occurrence = last_occurrence
                        new.first_occurrence = first_occurrence

                    else:
                        first_occurrence = origin_report.first_occurrence
                        last_occurrence = origin_report.last_occurrence
                        for rep in problem:
                            if not rep.problem_id:
                                rep.problem_id = origin_report.problem_id

                                if first_occurrence > rep.first_occurrence:
                                    first_occurrence = rep.first_occurrence
                                if last_occurrence < rep.last_occurrence:
                                    last_occurrence = rep.last_occurrence

                                if rep.component not in comps:
                                    comps[rep.component] = 0

                                comps[rep.component] += 1
                        orig_p = get_problem_by_id(db, origin_report.problem_id)
                        self.update_comps(db, comps, orig_p)
                        orig_p.last_occurrence = last_occurrence
                        orig_p.first_occurrence = first_occurrence
                else:
                    # The report is assigned
                    if first_report.problem_id:
                        continue
                    else:
                        # One report that wasn't matched with anything else
                        new = Problem()
                        new.first_occurrence = first_report.first_occurrence
                        new.last_occurrence = first_report.last_occurrence
                        db.session.add(new)
                        db.session.flush()

                        self.update_comps(db, {first_report.component: 1}, new)
                        first_report.problem_id = new.id
            db.session.flush()

        else:
            for problem, db_problem, reports_changed in self._iter_problems(
                    db, problems, db_problems, problems_dict, reuse_problems):

                comps = {}

                problem_last_occurrence = None
                problem_first_occurrence = None
                for db_report in problem:
                    db_report.problem = db_problem

                    if (problem_last_occurrence is None or
                            problem_last_occurrence < db_report.last_occurrence):
                        problem_last_occurrence = db_report.last_occurrence

                    if (problem_first_occurrence is None or
                            problem_first_occurrence > db_report.first_occurrence):
                        problem_first_occurrence = db_report.first_occurrence

                    if db_report.component not in comps:
                        comps[db_report.component] = 0

                    comps[db_report.component] += 1

                # In case nothing changed, we don't want to mark db_problem
                # dirty which would cause another UPDATE
                if db_problem.first_occurrence != problem_first_occurrence:
                    db_problem.first_occurrence = problem_first_occurrence
                if db_problem.last_occurrence != problem_last_occurrence:
                    db_problem.last_occurrence = problem_last_occurrence

                if reports_changed:
                    self.update_comps(db, comps, db_problem)

            self.log_debug("Removing {0} invalid reports from problems"
                           .format(len(invalid_report_ids_to_clean)))
            for report_id in invalid_report_ids_to_clean:
                db_report = get_report_by_id(db, report_id)
                if db_report is not None:
                    db_report.problem_id = None
                    db.session.add(db_report)

            if report_min_count > 0:
                self.log_debug("Removing problems from low count reports")
                remove_problem_from_low_count_reports_by_type(db,
                                                              problemplugin.name,
                                                              min_count=report_min_count)

            self.log_debug("Flushing session")
            db.session.flush()
示例#3
0
    def _create_problems(self, db, problemplugin,
                         report_min_count=0, speedup=False):
        if speedup:
            db_reports = get_reports_for_problems(db, problemplugin.name)
            db_reports += get_unassigned_reports(db, problemplugin.name,
                                                 min_count=report_min_count)
        else:
            db_reports = get_reports_by_type(db, problemplugin.name,
                                             min_count=report_min_count)
        db_problems = get_problems(db)

        # dict to get db_problem by problem_id
        self.log_debug("Creating problem reuse dict")
        problems_dict = {}
        for db_problem in db_problems:
            problems_dict[db_problem.id] = db_problem
        # dict to get report_ids by problem_id
        problem_report = defaultdict(list)
        for db_report in db_reports:
            if db_report.problem_id is not None:
                problem_report[db_report.problem_id].append(db_report.id)
        # create lookup dict for problems
        reuse_problems = {}
        for (problem_id, report_ids) in problem_report.items():
            reuse_problems[tuple(sorted(report_ids))] = problem_id

        invalid_report_ids_to_clean = []
        problems = []
        if not db_reports:
            self.log_info("No reports found")
        elif len(db_reports) == 1:
            db_report = db_reports[0]
            if db_report.problem is None:
                problems.append([db_report])
        else:
            report_map = {}
            _satyr_reports = []
            i = 0
            for db_report in db_reports:
                i += 1
                self.log_debug("[{0} / {1}] Loading report #{2}"
                               .format(i, len(db_reports), db_report.id))

                _satyr_report = problemplugin._db_report_to_satyr(db_report)
                if _satyr_report is None:
                    self.log_debug("Unable to create satyr report")
                    if db_report.problem_id is not None:
                        invalid_report_ids_to_clean.append(db_report.id)
                else:
                    _satyr_reports.append(_satyr_report)
                    report_map[_satyr_report] = db_report

                db.session.expire(db_report)

            self.log_debug("Clustering")
            clusters = self._create_clusters(_satyr_reports, 2000)
            # Threads that share no function with another thread
            unique_func_threads = set(_satyr_reports) - set().union(*clusters)

            dendrograms = []
            i = 0
            for cluster in clusters:
                i += 1
                self.log_debug("[{0} / {1}] Computing distances"
                               .format(i, len(clusters)))
                distances = satyr.Distances(cluster, len(cluster))

                self.log_debug("Getting dendrogram")
                dendrograms.append(satyr.Dendrogram(distances))

            for dendrogram, cluster in zip(dendrograms, clusters):
                problem = []
                for dups in dendrogram.cut(0.3, 1):
                    reports = set(report_map[cluster[dup]] for dup in dups)
                    problem.append(reports)

                problems.extend(problem)

            # Unique threads form their own unique problems
            for thread in unique_func_threads:
                problems.append({report_map[thread]})

        self.log_info("Creating problems from clusters")
        if speedup:
            for problem in problems:
                if not problem:
                    continue
                first_report = next(iter(problem))
                if len(problem) > 1:
                    # Find assigned report
                    origin_report = None
                    for db_report in problem:
                        if db_report.problem_id:
                            origin_report = db_report

                    # Problem created only from new reports
                    comps = {}
                    if not origin_report:
                        new = Problem()
                        db.session.add(new)
                        db.session.flush()
                        first_occurrence = first_report.first_occurrence
                        last_occurrence = first_report.last_occurrence
                        for rep in problem:
                            rep.problem_id = new.id

                            if first_occurrence > rep.first_occurrence:
                                first_occurrence = rep.first_occurrence
                            if last_occurrence < rep.last_occurrence:
                                last_occurrence = rep.last_occurrence

                            if rep.component not in comps:
                                comps[rep.component] = 0

                            comps[rep.component] += 1
                        self.update_comps(db, comps, new)
                        new.last_occurrence = last_occurrence
                        new.first_occurrence = first_occurrence

                    else:
                        first_occurrence = origin_report.first_occurrence
                        last_occurrence = origin_report.last_occurrence
                        for rep in problem:
                            if not rep.problem_id:
                                rep.problem_id = origin_report.problem_id

                                if first_occurrence > rep.first_occurrence:
                                    first_occurrence = rep.first_occurrence
                                if last_occurrence < rep.last_occurrence:
                                    last_occurrence = rep.last_occurrence

                                if rep.component not in comps:
                                    comps[rep.component] = 0

                                comps[rep.component] += 1
                        orig_p = get_problem_by_id(db, origin_report.problem_id)
                        self.update_comps(db, comps, orig_p)
                        orig_p.last_occurrence = last_occurrence
                        orig_p.first_occurrence = first_occurrence
                else:
                    # The report is assigned
                    if first_report.problem_id:
                        continue
                    else:
                        # One report that wasn't matched with anything else
                        new = Problem()
                        new.first_occurrence = first_report.first_occurrence
                        new.last_occurrence = first_report.last_occurrence
                        db.session.add(new)
                        db.session.flush()

                        self.update_comps(db, {first_report.component: 1}, new)
                        first_report.problem_id = new.id
            db.session.flush()

        else:
            for problem, db_problem, reports_changed in self._iter_problems(
                    db, problems, db_problems, problems_dict, reuse_problems):

                comps = {}

                problem_last_occurrence = None
                problem_first_occurrence = None
                for db_report in problem:
                    db_report.problem = db_problem

                    if (problem_last_occurrence is None or
                            problem_last_occurrence < db_report.last_occurrence):
                        problem_last_occurrence = db_report.last_occurrence

                    if (problem_first_occurrence is None or
                            problem_first_occurrence > db_report.first_occurrence):
                        problem_first_occurrence = db_report.first_occurrence

                    if db_report.component not in comps:
                        comps[db_report.component] = 0

                    comps[db_report.component] += 1

                # In case nothing changed, we don't want to mark db_problem
                # dirty which would cause another UPDATE
                if db_problem.first_occurrence != problem_first_occurrence:
                    db_problem.first_occurrence = problem_first_occurrence
                if db_problem.last_occurrence != problem_last_occurrence:
                    db_problem.last_occurrence = problem_last_occurrence

                if reports_changed:
                    self.update_comps(db, comps, db_problem)

            self.log_debug("Removing {0} invalid reports from problems"
                           .format(len(invalid_report_ids_to_clean)))
            for report_id in invalid_report_ids_to_clean:
                db_report = get_report_by_id(db, report_id)
                if db_report is not None:
                    db_report.problem_id = None
                    db.session.add(db_report)

            if report_min_count > 0:
                self.log_debug("Removing problems from low count reports")
                remove_problem_from_low_count_reports_by_type(db,
                                                              problemplugin.name,
                                                              min_count=report_min_count)

            self.log_debug("Flushing session")
            db.session.flush()
示例#4
0
    def _create_problems(
            self,
            db,
            problemplugin,  #pylint: disable=too-many-statements
            report_min_count=0,
            speedup=False):
        if speedup:
            self.log_debug("[%s] Getting reports for problems",
                           problemplugin.name)
            db_reports = get_reports_for_problems(db, problemplugin.name)

            self.log_debug("[%s] Getting unassigned reports",
                           problemplugin.name)
            db_reports += get_unassigned_reports(db,
                                                 problemplugin.name,
                                                 min_count=report_min_count)
        else:
            db_reports = get_reports_by_type(db,
                                             problemplugin.name,
                                             min_count=report_min_count)
        db_problems = get_problems(db)

        # dict to get db_problem by problem_id
        self.log_debug("Creating problem reuse dict")
        problems_dict = {}
        for db_problem in db_problems:
            problems_dict[db_problem.id] = db_problem
        # dict to get report_ids by problem_id
        problem_report = defaultdict(list)
        for db_report in db_reports:
            if db_report.problem_id is not None:
                problem_report[db_report.problem_id].append(db_report.id)
        # create lookup dict for problems
        reuse_problems = {}
        for (problem_id, report_ids) in problem_report.items():
            reuse_problems[tuple(sorted(report_ids))] = problem_id

        invalid_report_ids_to_clean = []
        problems = []
        if not db_reports:
            self.log_info("No reports found")
        elif len(db_reports) == 1:
            db_report = db_reports[0]
            if db_report.problem is None:
                problems.append([db_report])
        else:
            report_map = {}
            _satyr_reports = []
            db_reports_len = len(db_reports)
            n_processed = 1

            # split the work to multiple workers
            with ThreadPoolExecutor(self._max_workers) as executor:
                # schedule db_reports for processing
                futures = {
                    executor.submit(problemplugin.db_report_to_satyr, report):
                    report
                    for report in db_reports
                }

                for future in as_completed(futures):
                    db_report = futures.pop(future)
                    self.log_debug("[%d / %d] Loading report #%d", n_processed,
                                   db_reports_len, db_report.id)

                    _satyr_report = future.result()
                    if _satyr_report is None:
                        self.log_debug("Unable to create satyr report")
                        if db_report.problem_id is not None:
                            invalid_report_ids_to_clean.append(db_report.id)
                    else:
                        _satyr_reports.append(_satyr_report)
                        report_map[_satyr_report] = db_report

                    n_processed += 1

                db.session.expire_all()

            self.log_debug("Clustering")
            clusters = self._create_clusters(_satyr_reports, 2000)
            # Threads that share no function with another thread
            unique_func_threads = set(_satyr_reports) - set().union(*clusters)

            dendrograms = []
            clusters_len = len(clusters)
            for i, cluster in enumerate(clusters, start=1):
                self.log_debug("[%d / %d] Computing distances", i,
                               clusters_len)
                distances = satyr.Distances(cluster, len(cluster))

                self.log_debug("Getting dendrogram")
                dendrograms.append(satyr.Dendrogram(distances))

            dendogram_cut = 0.3
            if speedup:
                dendogram_cut = dendogram_cut * 1.1

            for dendrogram, cluster in zip(dendrograms, clusters):
                problem = []
                for dups in dendrogram.cut(dendogram_cut, 1):
                    reports = set(report_map[cluster[dup]] for dup in dups)
                    problem.append(reports)

                problems.extend(problem)

            # Unique threads form their own unique problems
            for thread in unique_func_threads:
                problems.append({report_map[thread]})

        self.log_info("Creating problems from clusters")
        if speedup:
            for problem in problems:
                if not problem:
                    continue
                first_report = next(iter(problem))
                if len(problem) > 1:
                    # Find assigned report
                    origin_report = None
                    for db_report in problem:
                        if db_report.problem_id:
                            origin_report = db_report

                    # Problem created only from new reports
                    comps = {}
                    if not origin_report:
                        new = Problem()
                        db.session.add(new)
                        db.session.flush()
                        first_occurrence = first_report.first_occurrence
                        last_occurrence = first_report.last_occurrence
                        for rep in problem:
                            rep.problem_id = new.id

                            if first_occurrence > rep.first_occurrence:
                                first_occurrence = rep.first_occurrence
                            if last_occurrence < rep.last_occurrence:
                                last_occurrence = rep.last_occurrence

                            if rep.component not in comps:
                                comps[rep.component] = 0

                            comps[rep.component] += 1
                        self.update_comps(db, comps, new)
                        new.last_occurrence = last_occurrence
                        new.first_occurrence = first_occurrence

                    else:
                        first_occurrence = origin_report.first_occurrence
                        last_occurrence = origin_report.last_occurrence
                        for rep in problem:
                            if not rep.problem_id:
                                rep.problem_id = origin_report.problem_id

                                if first_occurrence > rep.first_occurrence:
                                    first_occurrence = rep.first_occurrence
                                if last_occurrence < rep.last_occurrence:
                                    last_occurrence = rep.last_occurrence

                                if rep.component not in comps:
                                    comps[rep.component] = 0

                                comps[rep.component] += 1
                        orig_p = get_problem_by_id(db,
                                                   origin_report.problem_id)
                        self.update_comps(db, comps, orig_p)
                        orig_p.last_occurrence = last_occurrence
                        orig_p.first_occurrence = first_occurrence
                else:
                    # The report is assigned
                    if first_report.problem_id:
                        continue
                    # One report that wasn't matched with anything else
                    new = Problem()
                    new.first_occurrence = first_report.first_occurrence
                    new.last_occurrence = first_report.last_occurrence
                    db.session.add(new)
                    db.session.flush()

                    self.update_comps(db, {first_report.component: 1}, new)
                    first_report.problem_id = new.id
            db.session.flush()

        else:
            for problem, db_problem, reports_changed in self._iter_problems(
                    db, problems, db_problems, problems_dict, reuse_problems):

                comps = {}

                problem_last_occurrence = None
                problem_first_occurrence = None
                for db_report in problem:
                    db_report.problem = db_problem

                    if (problem_last_occurrence is None
                            or problem_last_occurrence <
                            db_report.last_occurrence):
                        problem_last_occurrence = db_report.last_occurrence

                    if (problem_first_occurrence is None
                            or problem_first_occurrence >
                            db_report.first_occurrence):
                        problem_first_occurrence = db_report.first_occurrence

                    if db_report.component not in comps:
                        comps[db_report.component] = 0

                    comps[db_report.component] += 1

                # In case nothing changed, we don't want to mark db_problem
                # dirty which would cause another UPDATE
                if db_problem.first_occurrence != problem_first_occurrence:
                    db_problem.first_occurrence = problem_first_occurrence
                if db_problem.last_occurrence != problem_last_occurrence:
                    db_problem.last_occurrence = problem_last_occurrence

                if reports_changed:
                    self.update_comps(db, comps, db_problem)

            self.log_debug("Removing %d invalid reports from problems",
                           len(invalid_report_ids_to_clean))
            unassign_reports(db, invalid_report_ids_to_clean)

            if report_min_count > 0:
                self.log_debug("Removing problems from low count reports")
                remove_problem_from_low_count_reports_by_type(
                    db, problemplugin.name, min_count=report_min_count)

            self.log_debug("Flushing session")
            db.session.flush()
示例#5
0
    def run(self, cmdline, db):
        if cmdline.problemtype is None or len(cmdline.problemtype) < 1:
            ptypes = problemtypes.keys()
        else:
            ptypes = []
            for ptype in cmdline.problemtype:
                if ptype not in problemtypes:
                    self.log_warn("Problem type '{0}' is not supported"
                                  .format(ptype))
                    continue

                ptypes.append(ptype)

        if len(ptypes) < 1:
            self.log_info("Nothing to do")
            return 0

        i = 0
        for ptype in ptypes:
            i += 1

            problemtype = problemtypes[ptype]

            self.log_info("[{0} / {1}] Processing problem type '{2}'"
                          .format(i, len(ptypes), problemtype.nice_name))
            db_reports = get_reports_by_type(db, ptype)

            j = 0
            for db_report in db_reports:
                j += 1

                self.log_info("  [{0} / {1}] Processing report #{2}"
                              .format(j, len(db_reports), db_report.id))

                hashes = set()
                k = 0
                for db_backtrace in db_report.backtraces:
                    k += 1

                    self.log_debug("    [{0} / {1}] Processing backtrace #{2}"
                                   .format(k, len(db_report.backtraces),
                                           db_backtrace.id))
                    try:
                        component = db_report.component.name
                        include_offset = ptype.lower() == "python"
                        bthash = self._hash_backtrace(db_backtrace,
                                                      hashbase=[component],
                                                      offset=include_offset)
                        self.log_debug("    {0}".format(bthash))
                        db_dup = get_report_by_hash(db, bthash)
                        if db_dup is None:
                            self.log_info("    Adding hash '{0}'"
                                          .format(bthash))
                            if not bthash in hashes:
                                db_reporthash = ReportHash()
                                db_reporthash.report = db_report
                                db_reporthash.hash = bthash
                                db.session.add(db_reporthash)
                                hashes.add(bthash)
                        elif db_dup == db_report:
                            self.log_debug("    Hash '{0}' already assigned"
                                           .format(bthash))
                        else:
                            self.log_warn(("    Conflict! Skipping hash '{0}'"
                                           " (report #{1})").format(bthash,
                                                                    db_dup.id))
                    except FafError as ex:
                        self.log_warn("    {0}".format(str(ex)))
                        continue

                db.session.flush()
示例#6
0
    def _create_problems(self, db, problemplugin):
        db_reports = get_reports_by_type(db, problemplugin.name)
        db_problems = get_problems(db)

        # dict to get db_problem by problem_id
        self.log_debug("Creating problem reuse dict")
        problems_dict = {}
        for db_problem in db_problems:
            problems_dict[db_problem.id] = db_problem
        # dict to get report_ids by problem_id
        problem_report = defaultdict(list)
        for db_report in db_reports:
            if db_report.problem_id is not None:
                problem_report[db_report.problem_id].append(db_report.id)
        # create lookup dict for problems
        reuse_problems = {}
        for (problem_id, report_ids) in problem_report.items():
            reuse_problems[tuple(sorted(report_ids))] = problem_id

        problems = []
        if len(db_reports) < 1:
            self.log_info("No reports found")
        elif len(db_reports) == 1:
            db_report = db_reports[0]
            if db_report.problem is None:
                problems.append([db_report])
        else:
            report_map = {}
            _satyr_reports = []
            i = 0
            for db_report in db_reports:
                i += 1
                self.log_debug("[{0} / {1}] Loading report #{2}"
                               .format(i, len(db_reports), db_report.id))

                _satyr_report = problemplugin._db_report_to_satyr(db_report)
                if _satyr_report is None:
                    self.log_debug("Unable to create satyr report")
                else:
                    _satyr_reports.append(_satyr_report)
                    report_map[_satyr_report] = db_report

                db.session.expire(db_report)

            self.log_debug("Clustering")
            clusters = self._create_clusters(_satyr_reports, 2000)
            unique_func_threads = set(_satyr_reports) - set().union(*clusters)

            dendrograms = []
            i = 0
            for cluster in clusters:
                i += 1
                self.log_debug("[{0} / {1}] Computing distances"
                               .format(i, len(clusters)))
                distances = satyr.Distances(cluster, len(cluster))

                self.log_debug("Getting dendrogram")
                dendrograms.append(satyr.Dendrogram(distances))

            for dendrogram, cluster in zip(dendrograms, clusters):
                problem = []
                for dups in dendrogram.cut(0.3, 1):
                    reports = set(report_map[cluster[dup]] for dup in dups)
                    problem.append(reports)

                problems.extend(problem)

            for thread in unique_func_threads:
                problems.append(set([report_map[thread]]))

        self.log_info("Creating problems")
        i = 0
        lookedup_count = 0
        found_count = 0
        created_count = 0
        for problem in problems:
            i += 1

            self.log_debug("[{0} / {1}] Creating problem"
                           .format(i, len(problems)))
            comps = {}

            reports_changed = True
            problem_id = reuse_problems.get(
                tuple(sorted([db_report.id for db_report in problem])), None)
            if problem_id is not None:
                db_problem = problems_dict.get(problem_id, None)
                reports_changed = False
                lookedup_count += 1
                self.log_debug("Looked up existing problem #{0}"
                               .format(db_problem.id))
            else:
                db_problem = self._find_problem(db_problems, problem)
                found_count += 1

            if db_problem is None:
                db_problem = Problem()
                db.session.add(db_problem)

                db_problems.append(db_problem)
                created_count += 1

            for db_report in problem:
                db_report.problem = db_problem

                if (db_problem.last_occurrence is None or
                    db_problem.last_occurrence < db_report.last_occurrence):
                    db_problem.last_occurrence = db_report.last_occurrence

                if (db_problem.first_occurrence is None or
                    db_problem.first_occurrence < db_report.first_occurrence):
                    db_problem.first_occurrence = db_report.first_occurrence

                if db_report.component not in comps:
                    comps[db_report.component] = 0

                comps[db_report.component] += 1

            if reports_changed:
                db_comps = sorted(comps, key=lambda x: comps[x], reverse=True)

                order = 0
                for db_component in db_comps:
                    order += 1

                    db_pcomp = get_problem_component(db, db_problem, db_component)
                    if db_pcomp is None:
                        db_pcomp = ProblemComponent()
                        db_pcomp.problem = db_problem
                        db_pcomp.component = db_component
                        db_pcomp.order = order
                        db.session.add(db_pcomp)

        self.log_debug("Total: {0}  Looked up: {1}  Found: {2}  Created: {3}"
                       .format(i, lookedup_count, found_count, created_count))
        self.log_debug("Flushing session")
        db.session.flush()
示例#7
0
    def _create_problems(self, db, problemplugin):
        db_reports = get_reports_by_type(db, problemplugin.name)
        db_problems = get_problems(db)

        # dict to get db_problem by problem_id
        self.log_debug("Creating problem reuse dict")
        problems_dict = {}
        for db_problem in db_problems:
            problems_dict[db_problem.id] = db_problem
        # dict to get report_ids by problem_id
        problem_report = defaultdict(list)
        for db_report in db_reports:
            if db_report.problem_id is not None:
                problem_report[db_report.problem_id].append(db_report.id)
        # create lookup dict for problems
        reuse_problems = {}
        for (problem_id, report_ids) in problem_report.items():
            reuse_problems[tuple(sorted(report_ids))] = problem_id

        invalid_report_ids_to_clean = []
        problems = []
        if len(db_reports) < 1:
            self.log_info("No reports found")
        elif len(db_reports) == 1:
            db_report = db_reports[0]
            if db_report.problem is None:
                problems.append([db_report])
        else:
            report_map = {}
            _satyr_reports = []
            i = 0
            for db_report in db_reports:
                i += 1
                self.log_debug("[{0} / {1}] Loading report #{2}"
                               .format(i, len(db_reports), db_report.id))

                _satyr_report = problemplugin._db_report_to_satyr(db_report)
                if _satyr_report is None:
                    self.log_debug("Unable to create satyr report")
                    if db_report.problem_id is not None:
                        invalid_report_ids_to_clean.append(db_report.id)
                else:
                    _satyr_reports.append(_satyr_report)
                    report_map[_satyr_report] = db_report

                db.session.expire(db_report)

            self.log_debug("Clustering")
            clusters = self._create_clusters(_satyr_reports, 2000)
            unique_func_threads = set(_satyr_reports) - set().union(*clusters)

            dendrograms = []
            i = 0
            for cluster in clusters:
                i += 1
                self.log_debug("[{0} / {1}] Computing distances"
                               .format(i, len(clusters)))
                distances = satyr.Distances(cluster, len(cluster))

                self.log_debug("Getting dendrogram")
                dendrograms.append(satyr.Dendrogram(distances))

            for dendrogram, cluster in zip(dendrograms, clusters):
                problem = []
                for dups in dendrogram.cut(0.3, 1):
                    reports = set(report_map[cluster[dup]] for dup in dups)
                    problem.append(reports)

                problems.extend(problem)

            for thread in unique_func_threads:
                problems.append(set([report_map[thread]]))

        self.log_info("Creating problems from clusters")
        for problem, db_problem, reports_changed in self._iter_problems(
                db, problems, db_problems, problems_dict, reuse_problems):

            comps = {}

            problem_last_occurrence = None
            problem_first_occurrence = None
            for db_report in problem:
                db_report.problem = db_problem

                if (problem_last_occurrence is None or
                    problem_last_occurrence < db_report.last_occurrence):
                    problem_last_occurrence = db_report.last_occurrence

                if (problem_first_occurrence is None or
                    problem_first_occurrence > db_report.first_occurrence):
                    problem_first_occurrence = db_report.first_occurrence

                if db_report.component not in comps:
                    comps[db_report.component] = 0

                comps[db_report.component] += 1

            # In case nothing changed, we don't want to mark db_problem dirty
            # which would cause another UPDATE
            if db_problem.first_occurrence != problem_first_occurrence:
                db_problem.first_occurrence = problem_first_occurrence
            if db_problem.last_occurrence != problem_last_occurrence:
                db_problem.last_occurrence = problem_last_occurrence

            if reports_changed:
                db_comps = sorted(comps, key=lambda x: comps[x], reverse=True)

                order = 0
                for db_component in db_comps:
                    order += 1

                    db_pcomp = get_problem_component(db, db_problem, db_component)
                    if db_pcomp is None:
                        db_pcomp = ProblemComponent()
                        db_pcomp.problem = db_problem
                        db_pcomp.component = db_component
                        db_pcomp.order = order
                        db.session.add(db_pcomp)

        self.log_debug("Removing {0} invalid reports from problems"
                       .format(len(invalid_report_ids_to_clean)))
        for report_id in invalid_report_ids_to_clean:
            db_report = get_report_by_id(db, report_id)
            if db_report is not None:
                db_report.problem_id = None
                db.session.add(db_report)

        self.log_debug("Flushing session")
        db.session.flush()