Exemplo n.º 1
0
    def test_with_changed_uasts_rpc_error(self):
        called = False

        def func(imposter, ptr_from: ReferencePointer,
                 ptr_to: ReferencePointer, data_service: DataService, **data):
            nonlocal called
            called = True

        def fail(f):
            def wrapped():
                f()
                self.assertIsNotNone(
                    self.data_service._data_request_local.channel)
                raise grpc.RpcError()

            return wrapped

        self.data_service._get_channel = fail(self.data_service._get_channel)
        func = with_changed_uasts(unicode=False)(func)

        self.assertRaises(
            grpc.RpcError, func, self,
            ReferencePointer(self.url, self.ref, self.COMMIT_FROM),
            ReferencePointer(self.url, self.ref, self.COMMIT_TO),
            self.data_service)
        self.assertFalse(called)
        self.assertIsNone(self.data_service._data_request_local.channel)
Exemplo n.º 2
0
 def process_review_event(self, request: ReviewEvent) -> EventResponse:
     base_ptr = ReferencePointer.from_pb(request.commit_revision.base)
     head_ptr = ReferencePointer.from_pb(request.commit_revision.head)
     response = EventResponse()
     response.analyzer_version = self.version
     comments = []
     for analyzer in self._analyzers:
         try:
             mycfg = dict(request.configuration[analyzer.__name__])
         except (KeyError, ValueError):
             mycfg = {}
         model, cache_miss = self._model_repository.get(
             self._model_id(analyzer), analyzer.model_type, base_ptr.url)
         if cache_miss:
             self._log.info("cache miss: %s", analyzer.__name__)
         if model is None:
             self._log.info("training: %s", analyzer.__name__)
             model = analyzer.train(base_ptr, mycfg, self._data_service.get())
             self._model_repository.set(self._model_id(analyzer), base_ptr.url, model)
         self._log.debug("running %s", analyzer.__name__)
         results = analyzer(model, head_ptr.url, mycfg).analyze(
             base_ptr, head_ptr, self._data_service.get())
         self._log.info("%s: %d comments", analyzer.__name__, len(results))
         comments.extend(results)
     response.comments.extend(comments)
     return response
Exemplo n.º 3
0
    def test_with_changed_uasts_and_contents(self):
        def func(imposter, ptr_from: ReferencePointer,
                 ptr_to: ReferencePointer, data_request_stub: DataStub,
                 **data):
            changes = list(data["changes"])
            self.assertEqual(len(changes), 1)
            change = changes[0]
            self.assertEqual(len(change.base.content), 5548)
            self.assertEqual(len(change.head.content), 5542)
            self.assertEqual(
                type(change.base.uast).__module__, bblfsh.Node.__module__)
            self.assertEqual(
                type(change.head.uast).__module__, bblfsh.Node.__module__)
            self.assertEqual(change.base.path, change.head.path)
            self.assertEqual(change.base.path, "lookout/core/manager.py")
            self.assertEqual(change.base.language, "Python")
            self.assertEqual(change.head.language, "Python")

        func = with_changed_uasts_and_contents(func)
        func(
            self,
            ReferencePointer(self.url, self.ref,
                             "4984b98b0e2375e9372fbab4eb4c9cd8f0c289c6"),
            ReferencePointer(self.url, self.ref,
                             "5833b4ba94154cf1ed07f37c32928c7b4411b36b"),
            self.data_service.get())
Exemplo n.º 4
0
 def setUpClass(cls):
     logging.basicConfig(level=logging.INFO)
     logging.getLogger("IdTyposAnalyzer").setLevel(logging.DEBUG)
     base = Path(__file__).parent
     # str() is needed for Python 3.5
     cls.bblfsh_client = bblfsh.BblfshClient("0.0.0.0:9432")
     with lzma.open(str(base / "test_base_file.js.xz")) as fin:
         contents = fin.read()
         uast = cls.bblfsh_client.parse("test_base_file.js",
                                        contents=contents).uast
         cls.base_files = [
             FakeFile(path="test_base_file.js",
                      content=contents,
                      uast=uast,
                      language="Javascript")
         ]
     with lzma.open(str(base / "test_head_file.js.xz")) as fin:
         contents = fin.read()
         uast = cls.bblfsh_client.parse("test_head_file.js",
                                        contents=contents).uast
         cls.head_files = [
             FakeFile(path="test_head_file.js",
                      content=contents,
                      uast=uast,
                      language="Javascript")
         ]
     cls.ptr = ReferencePointer("someurl", "someref", "somecommit")
Exemplo n.º 5
0
 def process_push_event(self,
                        request: PushEvent) -> EventResponse:  # noqa: D401
     """
     Callback for push events invoked by EventListener.
     """
     ptr = ReferencePointer.from_pb(request.commit_revision.head)
     data_service = self._data_service
     for analyzer in self._analyzers:
         if analyzer.model_type == DummyAnalyzerModel:
             continue
         try:
             mycfg = self._protobuf_struct_to_dict(
                 request.configuration[analyzer.name])
         except (KeyError, ValueError):
             mycfg = {}
         model = self._get_model(analyzer, ptr.url)
         if model is not None:
             must_train = analyzer.check_training_required(
                 model, ptr, mycfg, data_service)
             if not must_train:
                 self._log.info("skipped training %s", analyzer.name)
                 continue
         self._log.debug("training %s", analyzer.name)
         record_event("%s.train" % analyzer.name, 1)
         model = analyzer.train(ptr, mycfg, data_service)
         self._model_repository.set(self._model_id(analyzer), ptr.url,
                                    model)
     response = EventResponse()
     response.analyzer_version = self.version
     return response
Exemplo n.º 6
0
def request_changes(stub: DataStub, ptr_from: ReferencePointer, ptr_to: ReferencePointer,
                    contents: bool, uast: bool, unicode: bool) -> Iterator[Change]:
    """
    Invoke GRPC API and get the changes. Used by `with_changed_uasts()` and Review events.

    :return: The stream of the gRPC invocation results. In theory, `.result()` would turn this \
             into a synchronous call, but in practice, that function call hangs for some reason.
    """
    request = ChangesRequest(base=ptr_from.to_pb(), head=ptr_to.to_pb())
    request.exclude_pattern = GARBAGE_PATTERN
    request.exclude_vendored = True
    request.want_contents = contents
    request.want_language = contents or uast
    request.want_uast = uast
    changes = stub.GetChanges(request)
    if unicode:
        changes = map(BytesToUnicodeConverter.convert_change, changes)
    return changes
Exemplo n.º 7
0
    def test_with_changed_contents(self):
        def func(imposter, ptr_from: ReferencePointer,
                 ptr_to: ReferencePointer, data_service: DataService, **data):
            self.assertIsInstance(data_service, DataService)
            changes = list(data["changes"])
            self.assertEqual(len(changes), 1)
            change = changes[0]
            self.assertEqual(len(change.base.content), 5548)
            self.assertEqual(len(change.head.content), 5542)
            self.assertFalse(change.base.uast.children)
            self.assertFalse(change.head.uast.children)
            self.assertEqual(change.base.path, change.head.path)
            self.assertEqual(change.base.path, "lookout/core/manager.py")
            self.assertEqual(change.base.language, "Python")
            self.assertEqual(change.head.language, "Python")

        func = with_changed_contents(unicode=False)(func)
        func(self, ReferencePointer(self.url, self.ref, self.COMMIT_FROM),
             ReferencePointer(self.url, self.ref, self.COMMIT_TO),
             self.data_service)
Exemplo n.º 8
0
 def setUpClass(cls):
     logging.basicConfig(level=logging.INFO)
     logging.getLogger("FormatAnalyzer").setLevel(logging.DEBUG)
     base = Path(__file__).parent
     # str() is needed for Python 3.5
     with lzma.open(str(base / "benchmark.uast.xz")) as fin:
         cls.uast = bblfsh.Node.FromString(fin.read())
     cls.base_files = cls.get_files_from_tar(str(base / "freecodecamp-base.tar.xz"))
     cls.head_files = cls.get_files_from_tar(str(base / "freecodecamp-head.tar.xz"))
     cls.ptr = ReferencePointer("someurl", "someref", "somecommit")
     FeatureExtractor._log.level = logging.DEBUG
     cls.bblfsh_client = bblfsh.BblfshClient("0.0.0.0:9432")
Exemplo n.º 9
0
    def test_with_changed_uasts_unicode(self):
        def func(imposter, ptr_from: ReferencePointer,
                 ptr_to: ReferencePointer, data_service: DataService, **data):
            self.assertIsInstance(data_service, DataService)
            changes = list(data["changes"])
            self.assertEqual(len(changes), 1)
            change = changes[0]
            self.assertEqual(change.base.content, "")
            self.assertEqual(change.head.content, "")
            self.assertEqual(
                type(change.base.uast).__module__, bblfsh.Node.__module__)
            self.assertEqual(
                type(change.head.uast).__module__, bblfsh.Node.__module__)
            self.assertEqual(change.base.path, change.head.path)
            self.assertEqual(change.base.path, "lookout/core/manager.py")
            self.assertEqual(change.base.language, "Python")
            self.assertEqual(change.head.language, "Python")

        func = with_changed_uasts(unicode=True)(func)
        func(self, ReferencePointer(self.url, self.ref, self.COMMIT_FROM),
             ReferencePointer(self.url, self.ref, self.COMMIT_TO),
             self.data_service)
Exemplo n.º 10
0
 def process_push_event(self, request: PushEvent) -> EventResponse:
     ptr = ReferencePointer.from_pb(request.commit_revision.head)
     for analyzer in self._analyzers:
         self._log.debug("training %s", analyzer.__name__)
         try:
             mycfg = dict(request.configuration[analyzer.__name__])
         except (KeyError, ValueError):
             mycfg = {}
         model = analyzer.train(ptr, mycfg, self._data_service.get())
         self._model_repository.set(self._model_id(analyzer), ptr.url, model)
     response = EventResponse()
     response.analyzer_version = self.version
     return response
Exemplo n.º 11
0
 def process_review_event(
         self, request: ReviewEvent) -> EventResponse:  # noqa: D401
     """
     Callback for review events invoked by EventListener.
     """
     base_ptr = ReferencePointer.from_pb(request.commit_revision.base)
     head_ptr = ReferencePointer.from_pb(request.commit_revision.head)
     response = EventResponse()
     response.analyzer_version = self.version
     comments = []
     for analyzer in self._analyzers:
         try:
             mycfg = self._protobuf_struct_to_dict(
                 request.configuration[analyzer.name])
             self._log.info("%s config: %s", analyzer.name, mycfg)
         except (KeyError, ValueError):
             mycfg = {}
             self._log.debug("no config was provided for %s", analyzer.name)
         if analyzer.model_type != DummyAnalyzerModel:
             model = self._get_model(analyzer, base_ptr.url)
             if model is None:
                 self._log.info("training: %s", analyzer.name)
                 record_event("%s.train" % analyzer.name, 1)
                 model = analyzer.train(base_ptr, mycfg, self._data_service)
                 self._model_repository.set(self._model_id(analyzer),
                                            base_ptr.url, model)
         else:
             model = DummyAnalyzerModel()
         self._log.debug("running %s", analyzer.name)
         record_event("%s.analyze" % analyzer.name, 1)
         results = analyzer(model, head_ptr.url,
                            mycfg).analyze(base_ptr, head_ptr,
                                           self._data_service)
         self._log.info("%s: %d comments", analyzer.name, len(results))
         record_event("%s.comments" % analyzer.name, len(results))
         comments.extend(results)
     response.comments.extend(comments)
     return response
Exemplo n.º 12
0
def request_files(stub: DataStub, ptr: ReferencePointer, contents: bool, uast: bool,
                  unicode: bool) -> Iterator[File]:
    """
    Invoke GRPC API and get the files. Used by `with_uasts()` and Push events.

    :return: The stream of the gRPC invocation results.
    """
    request = FilesRequest(revision=ptr.to_pb())
    request.exclude_pattern = GARBAGE_PATTERN
    request.exclude_vendored = True
    request.want_contents = contents
    request.want_language = contents or uast
    request.want_uast = uast
    files = stub.GetFiles(request)
    if unicode:
        files = map(BytesToUnicodeConverter.convert_file, files)
    return files
Exemplo n.º 13
0
 def test_dummy_model(self):
     ptr = ReferencePointer("1", "2", "3")
     model = DummyAnalyzerModel.generate(FakeAnalyzer, ptr)
     self.assertEqual(model.name, FakeAnalyzer.name)
     self.assertEqual(model.version, [FakeAnalyzer.version])
     self.assertEqual(model.ptr, ptr)
     self.assertEqual(model.vendor, "source{d}")
     self.assertEqual(model.description,
                      "Model bound to fake Lookout analyzer.")
     buffer = io.BytesIO()
     model.save(buffer)
     buffer.seek(0)
     model2 = model.load(buffer)
     self.assertEqual(model.ptr, model2.ptr)
     self.assertEqual(model.name, model2.name)
     self.assertEqual(model.description, model2.description)
     self.assertEqual(model.vendor, model2.vendor)
Exemplo n.º 14
0
    def test_with_uasts(self):
        def func(imposter, ptr: ReferencePointer, config: dict,
                 data_service: DataService, **data):
            self.assertIsInstance(data_service, DataService)
            files = list(data["files"])
            self.assertEqual(len(files), 18)
            for file in files:
                self.assertEqual(file.content, b"")
                self.assertEqual(
                    type(file.uast).__module__, bblfsh.Node.__module__)
                self.assertTrue(file.path)
                self.assertIn(file.language,
                              ("Python", "YAML", "Dockerfile", "Markdown",
                               "Jupyter Notebook", "Shell", "Text", ""))

        func = with_uasts(unicode=False)(func)
        func(self, ReferencePointer(self.url, self.ref, self.COMMIT_TO), None,
             self.data_service)
Exemplo n.º 15
0
    def test_with_uasts(self):
        def func(imposter, ptr: ReferencePointer, config: dict,
                 data_request_stub: DataStub, **data):
            files = list(data["files"])
            self.assertEqual(len(files), 61)
            for file in files:
                self.assertEqual(file.content, b"")
                self.assertEqual(
                    type(file.uast).__module__, bblfsh.Node.__module__)
                self.assertTrue(file.path)
                self.assertIn(file.language,
                              ("Python", "YAML", "Dockerfile", "Markdown",
                               "Jupyter Notebook", "Shell", "Text", ""))

        func = with_uasts(func)
        func(
            self,
            ReferencePointer(self.url, self.ref,
                             "5833b4ba94154cf1ed07f37c32928c7b4411b36b"), None,
            self.data_service.get())
Exemplo n.º 16
0
    def test_with_contents(self):
        def func(imposter, ptr: ReferencePointer, config: dict,
                 data_service: DataService, **data):
            self.assertIsInstance(data_service, DataService)
            files = list(data["files"])
            self.assertEqual(len(files), 18)
            non_empty_langs = 0
            for file in files:
                if not file.path.endswith("__init__.py"):
                    self.assertGreater(len(file.content), 0, file.path)
                self.assertFalse(file.uast.children)
                self.assertTrue(file.path)
                if file.language:
                    non_empty_langs += 1
                self.assertIn(file.language,
                              ("Python", "YAML", "Dockerfile", "Markdown",
                               "Jupyter Notebook", "Shell", "Text", ""))
            self.assertGreater(non_empty_langs, 0)

        func = with_contents(unicode=False)(func)
        func(self, ReferencePointer(self.url, self.ref, self.COMMIT_TO), None,
             self.data_service)
Exemplo n.º 17
0
def main():
    setup("DEBUG", False)
    parser = ArgumentParser()
    parser.add_argument(
        "training_dir",
        help="Path to the directory containing the files to train from.")
    parser.add_argument("output_path", help="Path to the model to write.")
    parser.add_argument("--bblfsh",
                        default="0.0.0.0:9432",
                        help="Address of babelfish server.")
    parser.add_argument("--language",
                        default="javascript",
                        help="Language to filter on.")
    parser.add_argument(
        "--config",
        help="Path to a YAML file containing config to apply during training.")
    args = parser.parse_args()

    kwargs = vars(args)
    kwargs["ref"] = ReferencePointer(kwargs["training_dir"], "HEAD",
                                     "<unknown>")
    train(**kwargs)
Exemplo n.º 18
0
 def setUpClass(cls):
     logging.basicConfig(level=logging.INFO)
     logging.getLogger("IdTyposAnalyzer").setLevel(logging.DEBUG)
     base = Path(__file__).parent
     # str() is needed for Python 3.5
     cls.bblfsh_client = bblfsh.BblfshClient("0.0.0.0:9432")
     with lzma.open(str(base / "test_base_file.js.xz")) as fin:
         contents = fin.read()
         uast = cls.bblfsh_client.parse("test_base_file.js",
                                        contents=contents).uast
         cls.base_files = [
             File(path="test_file.js",
                  content=contents,
                  uast=uast,
                  language="Javascript")
         ]
     with lzma.open(str(base / "test_head_file.js.xz")) as fin:
         contents = b"var print_tipe = 0;\n" + fin.read()
         uast = cls.bblfsh_client.parse("test_head_file.js",
                                        contents=contents).uast
         cls.head_files = [
             File(path="test_file.js",
                  content=contents,
                  uast=uast,
                  language="Javascript")
         ]
     cls.ptr = ReferencePointer("someurl", "someref", "somecommit")
     cls.config = {
         "model": MODEL_PATH,
         "confidence_threshold": 0.0,
         "n_candidates": 3,
         "check_all_identifiers": True,
         "analyze": {
             "filepath": cls.base_files[0].path,
             "wrong_id": "print_tipe",
             "line": 0
         }
     }
Exemplo n.º 19
0
def train(training_dir: str, output_path: str, language: str, bblfsh: str, config: str
          ) -> None:
    """
    Train a FormatModel for debugging purposes.

    :param training_dir: Path to the directory containing the files to train from.
    :param output_path: Path to the model to write.
    :param language: Language to filter on.
    :param bblfsh: Address of the babelfish server.
    :param config: Path to a YAML config to use during the training.
    """
    bblfsh_client = BblfshClient(bblfsh)
    if config is not None:
        with open(config) as fh:
            config = safe_load(fh)
    else:
        config = {}
    filenames = glob.glob(join(training_dir, "**", "*"), recursive=True)
    model = FormatAnalyzer.train(
        ReferencePointer("someurl", "someref", "somecommit"),
        config,
        FakeDataService(bblfsh_client, prepare_files(filenames, bblfsh_client, language), None)
    )
    model.save(output_path)
Exemplo n.º 20
0
def quality_report_noisy(bblfsh: str,
                         language: str,
                         confidence_threshold: float,
                         support_threshold: int,
                         precision_threshold: float,
                         dir_output: str,
                         config: Optional[dict] = None,
                         repos: Optional[str] = None) -> None:
    """
    Generate a quality report on the artificial noisy dataset including evaluation curves.

    :param bblfsh: Babelfish client. Babelfish server should be started accordingly.
    :param language: Language to consider, others will be discarded.
    :param confidence_threshold: Confidence threshold to filter relevant rules.
    :param support_threshold: Support threshold to filter relevant rules.
    :param precision_threshold: Precision threshold tolerated by the model. \
           Limit drawn as a red horizontal line on the figure.
    :param dir_output: Path to the output directory where to store the quality report in Markdown \
           and the precision-recall curve in png format.
    :param config: FormatAnalyzer config to use. Default one is used if not set.
    :param repos: Input list of urls to the repositories to analyze. \
           Should be strings separated by newlines. If it is None, \
           we use the string defined at the beginning of the file.
    """
    log = logging.getLogger("quality_report_noisy")

    # initialization
    repo_names = []
    last_accepted_rule = {}
    prediction_rates, precisions, accepted_rules = (defaultdict(list)
                                                    for _ in range(3))
    n_mistakes, prec_max_prediction_rate, confidence_threshold_exp, max_prediction_rate, \
        n_rules, n_rules_filtered = ({} for _ in range(6))
    if repos is None:
        repos = REPOSITORIES
    try:
        # fetch the the original and noisy repositories
        client = BblfshClient(bblfsh)
        log.info("Repositories: %s", repos)
        with tempfile.TemporaryDirectory() as tmpdirname:
            for raw in repos.splitlines():
                repo_path, clean_commit, noisy_commit = raw.split(",")
                repo = repo_path.split("/")[-1]
                log.info("Fetching %s", repo_path)
                git_dir = os.path.join(tmpdirname, repo)
                git_dir_noisy = os.path.join(tmpdirname, repo + "_noisy")
                cmd1 = "git clone --single-branch --branch master %s %s" % (
                    repo_path, git_dir)
                cmd2 = "git clone --single-branch --branch style-noise-1-per-file %s %s" \
                    % (repo_path, git_dir_noisy)
                try:
                    for cmd in (cmd1, cmd2):
                        log.debug("Running: %s", cmd)
                        subprocess.check_call(cmd.split())
                except subprocess.CalledProcessError as e:
                    raise ConnectionError("Unable to fetch repository %s" %
                                          repo_path) from e

                # train the model on the original repository
                ref = ReferencePointer(repo_path, "HEAD", clean_commit)
                model_path = os.path.join(git_dir, "model.asdf")
                format_model = train(training_dir=git_dir,
                                     ref=ref,
                                     output_path=model_path,
                                     language=language,
                                     bblfsh=bblfsh,
                                     config=config,
                                     log=log)
                rules = format_model[language]

                # extract the raw data and the diff from the repositories
                input_pattern = os.path.join(git_dir, "**", "*.js")
                input_pattern_noisy = os.path.join(git_dir_noisy, "**", "*.js")
                true_content = get_content_from_repo(input_pattern)
                noisy_content = get_content_from_repo(input_pattern_noisy)
                true_files, noisy_files, start_changes = get_difflib_changes(
                    true_content, noisy_content)
                if not true_files:
                    raise ValueError(
                        "Noisy repo should count at least one artificial mistake"
                    )
                log.info(
                    "Number of files modified by adding style noise: %d / %d",
                    len(true_files), len(true_content))
                del true_content, noisy_content

                # extract the features
                feature_extractor = FeatureExtractor(
                    language=language,
                    **rules.origin_config["feature_extractor"])
                vnodes_y_true = files2vnodes(true_files, feature_extractor,
                                             rules, client)
                mispreds_noise = files2mispreds(noisy_files, feature_extractor,
                                                rules, client, log)

                # compute the prediction rate and precision score on the artificial noisy dataset
                diff_mispreds = get_diff_mispreds(mispreds_noise,
                                                  start_changes)
                changes_count = len(start_changes)
                n_rules[repo] = len(rules.rules)
                rules_id = [(i, r.stats.conf)
                            for i, r in enumerate(rules.rules)
                            if r.stats.conf > confidence_threshold
                            and r.stats.support > support_threshold]
                rules_id = sorted(rules_id, key=lambda k: k[1], reverse=True)
                for i in range(len(rules_id)):
                    filtered_mispreds = {
                        k: m
                        for k, m in diff_mispreds.items()
                        if any(r[0] == m.rule for r in rules_id[:i + 1])
                    }
                    style_fixes = get_style_fixes(filtered_mispreds,
                                                  vnodes_y_true, true_files,
                                                  noisy_files,
                                                  feature_extractor)
                    prediction_rate, precision = compute_metrics(
                        changes_count=changes_count,
                        predictions_count=len(filtered_mispreds),
                        true_positive=len(style_fixes))
                    prediction_rates[repo].append(round(prediction_rate, 3))
                    precisions[repo].append(round(precision, 3))
                print("prediction rate x:", prediction_rates[repo])
                print("precision y:", precisions[repo])

                # compute other statistics and quality metrics for the model's evaluation
                repo_names.append(repo)
                n_mistakes[repo] = len(true_files)
                prec_max_prediction_rate[repo] = precisions[repo][-1]
                max_prediction_rate[repo] = max(prediction_rates[repo])
                n_rules_filtered[repo] = len(rules_id)

                # compute the confidence and prediction rate limit for a given precision threshold
                for i, (prediction_rate, prec) in enumerate(
                        zip(prediction_rates[repo], precisions[repo])):
                    if prec >= precision_threshold:
                        accepted_rules[repo].append(
                            (i, rules_id[i][1], prediction_rate))
                last_accepted_rule[repo] = min(accepted_rules[repo],
                                               key=itemgetter(1))
                confidence_threshold_exp[repo] = (last_accepted_rule[repo][0],
                                                  last_accepted_rule[repo][1])
    finally:
        client._channel.close()

    # compute the index of the last accepted rule according to the maximum confidence threshold
    limit_conf_id = {}
    max_confidence_threshold_exp = max(confidence_threshold_exp.values(),
                                       key=itemgetter(1))
    for repo, rules in accepted_rules.items():
        for rule in rules:
            if rule[1] < max_confidence_threshold_exp[1]:
                break
            limit_conf_id[repo] = rule[0]

    # compile the curves showing the evolutions of the prediction rate and precision score
    path_to_figure = os.path.join(dir_output, "pr_curves.png")
    plot_curve(repo_names, prediction_rates, precisions, precision_threshold,
               limit_conf_id, path_to_figure)

    # compile the markdown template for the report through jinja2
    loader = jinja2.FileSystemLoader(
        (os.path.join(os.path.dirname(__file__), "..", "templates"), ),
        followlinks=True)
    env = jinja2.Environment(trim_blocks=True,
                             lstrip_blocks=True,
                             keep_trailing_newline=True)
    env.globals.update(range=range)
    template = loader.load(env, "noisy_quality_report.md.jinja2")
    report = template.render(repos=repo_names,
                             n_mistakes=n_mistakes,
                             prec_max_prediction_rate=prec_max_prediction_rate,
                             confidence_threshold_exp=round(
                                 max_confidence_threshold_exp[1], 2),
                             max_prediction_rate=max_prediction_rate,
                             confidence_threshold=confidence_threshold,
                             support_threshold=support_threshold,
                             n_rules=n_rules,
                             n_rules_filtered=n_rules_filtered,
                             path_to_figure=path_to_figure)

    # write the quality report
    repo_pathrt = os.path.join(dir_output, "report_noise.md")
    with open(repo_pathrt, "w", encoding="utf-8") as f:
        f.write(report)