def test_single_repo_oxygen(self):
        """Compare direct Oxygen result with the algorithm runner result."""
        direct_result = oxygen(self.modules1)
        runner_result = run_single_repo(self.modules1, OXYGEN)

        # Equality operator is not overloaded (yet), so the easiest
        # way of comparing results is using their JSON representations.
        assert direct_result.json() == runner_result.json()
    def _func_def_generic(self, second, algorithm, single):
        result = run_single_repo(self.reference + second, algorithm) \
            if single else run_two_repos(self.reference, second, algorithm)

        assert result is not None
        assert isinstance(result, DetectionResult)

        return result
Пример #3
0
def main():
    """Entry point of the application."""
    try:
        # Parse command start arguments
        repos, algorithm = handle_cli_args()

        time_snap("Arguments handled; Repositories parsed")

        module_list_1 = get_modules_from_dir(repos[0])
        time_snap("First repository modules parsed")

        if not module_list_1:
            raise UserInputError(f"First repository is empty: \"{repos[0]}\"")

        if repos[1]:
            module_list_2 = get_modules_from_dir(repos[1])
            time_snap("Second repository modules parsed")

            if not module_list_2:
                raise UserInputError(
                    f"Second repository is empty: \"{repos[1]}\"")

            log.info("Running a two-repository analysis...")
            result = run_two_repos(module_list_1, module_list_2, algorithm)

        else:
            log.info("Running a single-repository analysis...")
            result = run_single_repo(module_list_1, algorithm)

        time_snap("Analysis completed")

        # Save detection result to a JSON file.

        repo_0 = repos[0][repos[0].rfind('/') + 1:]
        json_filename = "clones_" + algorithm + "_" + repo_0 + "_"
        if repos[1]:
            repo_1 = repos[1][repos[1].rfind('/') + 1:]
            json_filename += repo_1 + "_"
        json_filename += datetime.now().strftime("%Y-%m-%d_%H-%M-%S") + ".json"

        with open(json_filename, "w") as f:
            f.write(result.json())

    except UserInputError as ex:
        if ex.message:
            log.error(ex.message)

        sys.exit(ex.code)
Пример #4
0
def hello():
    with open(_INDEX_HTML, "r", encoding="utf-8") as f:
        webpage = f.read()

    output = ""

    repo = request.args.get("repo")
    if repo:
        try:
            modules = get_modules_from_repo(repo)
            result = run_single_repo(modules, "oxygen")

            output = "<ol>" + "".join([(
                "<li>" + c.value + f" - Weight: {c.match_weight}" + "<ul>" +
                "".join([
                    "<li>" + orig + f" - Similarity: {sim * 100:g} %" + "</li>"
                    for orig, sim in c.origins.items()
                ]) + "</ul></li>") for c in result.clones]) + "</ol>"

        except UserInputError as ex:
            output = ex.message

    return webpage.replace("#LOG#", output)
Пример #5
0
def analyze_repo(repo_info, repo_id, algorithm=OXYGEN):
    """Analyze the repo using the specified algorithm. Store results in db."""
    log.info(f"Analyzing repository: {repo_info}")

    try:
        conn = pg_conn(db_url)

        if repo_info.clone_or_pull():
            log.success(
                f"Repository has been successfully cloned: {repo_info}")

        else:
            log.warning(f"Unable to clone repository: {repo_info}")

            conn.run(
                """UPDATE repos SET status = (SELECT id FROM states WHERE name = 'err_clone') WHERE id = %s;""",
                repo_id)

            return

        modules = get_modules_from_dir(repo_info.dir)

        if not modules:
            log.warning("Repository contains no Python module")
            return

        result = run_single_repo(modules, algorithm)

        # Insert repository analysis into database all at once
        with conn.transaction():
            commit_id = conn.one(
                """INSERT INTO commits (repo_id, hash) VALUES (%s, %s) RETURNING id;""",
                repo_id, repo_info.hash)

            for c in result.clones:
                cluster_id = conn.one(
                    """INSERT INTO clusters (commit_id, "value", weight) VALUES (%s, %s, %s) RETURNING id;""",
                    commit_id, c.value, c.match_weight)

                for o, s in c.origins.items():
                    conn.run(
                        """INSERT INTO origins (cluster_id, file, line, col_offset, similarity) VALUES (%s, %s, %s, %s, %s);""",
                        cluster_id, o.file, o.line, o.col_offset, s)

            log.success(
                f"Repository has been successfully analyzed: {repo_info}")

            conn.run(
                """UPDATE repos SET status = (SELECT id FROM states WHERE name = 'done') WHERE id = %s;""",
                repo_id)

        # Once done with the regular analysis, run pattern extraction
        with conn.transaction():
            _extract_patterns(conn, commit_id, modules)

        log.success(f"Pattern extraction from was successful: {repo_info}")

    except PG_Error as ex:
        handle_pg_error(ex, conn, repo_id)

    finally:
        conn.close()
 def test_single_repo_iodine(self):
     """Make sure single-repo Iodine fails. It is not implemented yet."""
     with raises(UserInputError):
         run_single_repo(self.modules1, IODINE)
    def test_single_repo_chlorine(self):
        """Compare direct Chlorine result with the algorithm runner result."""
        direct_result = chlorine_single_repo(self.modules1)
        runner_result = run_single_repo(self.modules1, CHLORINE)

        assert direct_result.json() == runner_result.json()