Exemplo n.º 1
0
def test_simple_test_1():
    test_path = path_join(chlorine_double_test_dir, "simple_test_1")

    expected_file = open(path_join(test_path, "result.json"))
    expected = json.load(expected_file)

    modules1 = get_modules_from_dir(path_join(test_path, "source1"))
    modules2 = get_modules_from_dir(path_join(test_path, "source2"))
    actual = json.loads(chlorine_two_repos(modules1, modules2).json())

    assert expected == actual
    def setUp(self):
        data_dir = path_join(test_data_dir, "func_def")

        reference_dir = path_join(data_dir, "reference")
        different_dir = path_join(data_dir, "different")
        type1_dir = path_join(data_dir, "type1")
        type2_dir = path_join(data_dir, "type2")

        self.reference = get_modules_from_dir(reference_dir)
        self.different = get_modules_from_dir(different_dir)
        self.type1 = get_modules_from_dir(type1_dir)
        self.type2 = get_modules_from_dir(type2_dir)
Exemplo n.º 3
0
def test_chlorine_two_not_none():
    """Check if two-repo mode of Chlorine returns non-empty result."""
    modules1 = get_modules_from_dir(test_repo1_dir)
    assert modules1

    modules2 = get_modules_from_dir(test_repo2_dir)
    assert modules2

    result = chlorine_two_repos(modules1, modules2)

    assert result
    assert result.clones
Exemplo n.º 4
0
def main():
    """Entry point of the application."""
    try:
        # Parse command start arguments
        repos, algorithm = handle_cli_args()

        time_snap("Arguments handled; Repositories parsed")

        module_list_1 = get_modules_from_dir(repos[0])
        time_snap("First repository modules parsed")

        if not module_list_1:
            raise UserInputError(f"First repository is empty: \"{repos[0]}\"")

        if repos[1]:
            module_list_2 = get_modules_from_dir(repos[1])
            time_snap("Second repository modules parsed")

            if not module_list_2:
                raise UserInputError(
                    f"Second repository is empty: \"{repos[1]}\"")

            log.info("Running a two-repository analysis...")
            result = run_two_repos(module_list_1, module_list_2, algorithm)

        else:
            log.info("Running a single-repository analysis...")
            result = run_single_repo(module_list_1, algorithm)

        time_snap("Analysis completed")

        # Save detection result to a JSON file.

        repo_0 = repos[0][repos[0].rfind('/') + 1:]
        json_filename = "clones_" + algorithm + "_" + repo_0 + "_"
        if repos[1]:
            repo_1 = repos[1][repos[1].rfind('/') + 1:]
            json_filename += repo_1 + "_"
        json_filename += datetime.now().strftime("%Y-%m-%d_%H-%M-%S") + ".json"

        with open(json_filename, "w") as f:
            f.write(result.json())

    except UserInputError as ex:
        if ex.message:
            log.error(ex.message)

        sys.exit(ex.code)
def run_test(test_dir):
    test_path = path_join(chlorine_single_test_dir, test_dir)

    expected_file = open(path_join(test_path, "result.json"))
    expected = json.load(expected_file)

    modules = get_modules_from_dir(path_join(test_path, "source"))
    actual = json.loads(chlorine_single_repo(modules).json())

    assert expected == actual
Exemplo n.º 6
0
def test_chlorine_single_not_none():
    """Check if single-repo mode of Chlorine returns non-empty result."""
    modules = get_modules_from_dir(test_repo1_dir)

    assert modules

    result = chlorine_single_repo(modules)

    assert result
    assert result.clones
def test_simple_1():
    test_path = path_join(oxygen_test_dir, "simple_test_1")

    expected_file = open(path_join(test_path, "result.json"))
    expected = json.load(expected_file)

    modules = get_modules_from_dir(path_join(test_path, "source1"))
    actual = json.loads(oxygen(modules).json())

    assert expected == actual
def test_oxygen_not_none():
    """Check if running Oxygen on a valid repo returns a non-empty result."""
    modules = get_modules_from_dir(test_repo1_dir)

    assert modules

    result = oxygen(modules)

    assert result
    assert result.clones
Exemplo n.º 9
0
def main():
    """Entry point of the application."""
    try:
        # Parse command line arguments
        repos, algorithm = handle_cli_args()

        time_snap("Cloned repositories")

        # Find all functions and parse their syntax tree using the TreeNode wrapper
        log.info("Parsing methods in repositories...")
        module_list_1 = get_modules_from_dir(repos[0])

        if not module_list_1:
            raise UserInputError(f"First repository is empty: \"{repos[0]}\"")

        time_snap("Parsed first repository")

        module_list_2 = get_modules_from_dir(repos[1])

        if not module_list_2:
            raise UserInputError(f"Second repository is empty: \"{repos[1]}\"")

        time_snap("Parsed second repository")

        log.info("Beginning full analysis...")
        clones = run_two_repos(module_list_1, module_list_2, algorithm)
        time_snap("Analysis completed")

        # Create output directory if it doesn't exist and print output
        output_path = os.getcwd()
        output_filename = "clones_" + \
            datetime.now().strftime("%Y-%m-%d_%H-%M-%S") + ".json"
        os.makedirs(output_path, exist_ok=True)
        with open(os.path.join(output_path, output_filename),
                  "w") as output_file:
            output_file.write(clones.json())

    except UserInputError as ex:
        if ex.message:
            log.error(ex.message)

        sys.exit(ex.code)
Exemplo n.º 10
0
def test_get_modules():
    """Check if any modules are parsed from a valid directory. Check types."""
    modules = get_modules_from_dir(test_repo1_dir)

    assert isinstance(modules, list)
    assert len(modules) > 0

    for m in modules:
        assert isinstance(m, list)

        for n in m:
            assert isinstance(n, TreeNode)
Exemplo n.º 11
0
def main():
    """
    Entry point of the application.
    """

    try:
        # Parse command line arguments
        repos = handle_args(sys.argv)

        time_snap("Cloned repositories")

        # ------- FOR TESTING PURPOSES ------------

        # Find all functions and parse their syntax tree using the TreeNode wrapper
        log.info("Parsing methods in repositories...")
        module_list_1 = get_modules_from_dir(repos[0])

        if not module_list_1:
            raise UserInputError(f"First repository is empty: \"{repos[0]}\"")

        time_snap("Parsed first repository")

        module_list_2 = get_modules_from_dir(repos[1])

        if not module_list_2:
            raise UserInputError(f"Second repository is empty: \"{repos[1]}\"")

        time_snap("Parsed second repository")

        log.info("Beginning full analysis...")
        iodine(module_list_1, module_list_2)
        time_snap("Analysis completed")

    except UserInputError as ex:
        if ex.message:
            log.error(ex.message)

        sys.exit(ex.code)
Exemplo n.º 12
0
def analyze_repo(repo_info, repo_id, algorithm=OXYGEN):
    """Analyze the repo using the specified algorithm. Store results in db."""
    log.info(f"Analyzing repository: {repo_info}")

    try:
        conn = pg_conn(db_url)

        if repo_info.clone_or_pull():
            log.success(
                f"Repository has been successfully cloned: {repo_info}")

        else:
            log.warning(f"Unable to clone repository: {repo_info}")

            conn.run(
                """UPDATE repos SET status = (SELECT id FROM states WHERE name = 'err_clone') WHERE id = %s;""",
                repo_id)

            return

        modules = get_modules_from_dir(repo_info.dir)

        if not modules:
            log.warning("Repository contains no Python module")
            return

        result = run_single_repo(modules, algorithm)

        # Insert repository analysis into database all at once
        with conn.transaction():
            commit_id = conn.one(
                """INSERT INTO commits (repo_id, hash) VALUES (%s, %s) RETURNING id;""",
                repo_id, repo_info.hash)

            for c in result.clones:
                cluster_id = conn.one(
                    """INSERT INTO clusters (commit_id, "value", weight) VALUES (%s, %s, %s) RETURNING id;""",
                    commit_id, c.value, c.match_weight)

                for o, s in c.origins.items():
                    conn.run(
                        """INSERT INTO origins (cluster_id, file, line, col_offset, similarity) VALUES (%s, %s, %s, %s, %s);""",
                        cluster_id, o.file, o.line, o.col_offset, s)

            log.success(
                f"Repository has been successfully analyzed: {repo_info}")

            conn.run(
                """UPDATE repos SET status = (SELECT id FROM states WHERE name = 'done') WHERE id = %s;""",
                repo_id)

        # Once done with the regular analysis, run pattern extraction
        with conn.transaction():
            _extract_patterns(conn, commit_id, modules)

        log.success(f"Pattern extraction from was successful: {repo_info}")

    except PG_Error as ex:
        handle_pg_error(ex, conn, repo_id)

    finally:
        conn.close()
 def setUp(self):
     """Load modules from both the first and the second source repository."""
     self.modules1 = get_modules_from_dir(test_repo1_dir)
     self.modules2 = get_modules_from_dir(test_repo2_dir)
 def setUp(self):
     """Load modules only from the first source repository."""
     self.modules1 = get_modules_from_dir(test_repo1_dir)