示例#1
0
    def checkout_refactored_commit(self, repo_cfg: dict, call_back: callable):
        gr = GitRepository(path=self.cfg['paths']['repo'] + repo_cfg['name'])
        try:
            gr.reset()
        except:
            pass

        df = pd.read_csv(self.cfg['paths']['commit_report'] +
                         repo_cfg['name'] + "_refactored.csv",
                         header=0)
        commits = df['commit'].tolist()
        previous = df['previous'].tolist()
        finished_commit = []

        # resume if output exist
        for file in os.listdir(self.cfg['paths']['smell_report'] +
                               repo_cfg['name']):
            if re.match(r'\d+\_\w+\_\w+\.csv', file):
                file_id, cs_type, commit = file.split('_')
                finished_commit.append(commit.replace('.csv', ''))

        unique_commit = set(commits + previous) - {np.nan}
        unique_commit -= set(finished_commit)

        cpt = 0
        cpt_total = len(unique_commit)
        for commit in unique_commit:
            cpt += 1
            print(f"{commit}\t\t\t{repo_cfg['name']}\t{cpt}/{cpt_total}")
            gr.checkout(commit)
            print('\t checkout -done!')
            repo_cfg['commit'] = commit
            call_back(repo_cfg)
示例#2
0
def main():
    start = time.time()
    source_database = ""
    dst_database = ""
    path = "workdir"
    dest_project_database_controller = Project_commits_info_Controller(
        dst_database)
    dest_model_database_controller = Model_commits_info_Controller(
        dst_database)

    project_verbatim = Project_commits_verbatim_Controller(dst_database)
    model_verbatim = Model_commits_verbatim_Controller(dst_database)
    # create a database connection
    conn = create_connection(source_database)
    dst_conn = create_connection(dst_database)
    with dst_conn:
        processed_id, processed_mdl_name = get_id_name(dst_conn)
    with conn:
        id_urls = get_repo_id_urls(conn)
        for id_url in id_urls:
            id, url, model_files, hash = id_url
            if not os.path.exists(path):
                os.mkdir(path)
            if url == "https://github.com/alesgraz/kinect2-SDK-for-Simulink" \
                    or url=="https://github.com/OpenCadd/Lego_nxt_car" \
                    or url=="https://github.com/StefanMack/ProjSensSys" \
                    or url=="https://github.com/chiloanel/UWMatlab"\
                    or url == "https://github.com/alesgraz/kinect2-SDK-for-Simulink":
                continue
            try:
                if id not in processed_id:
                    clone = "git clone " + url + " " + path
                    os.system(clone)  # Cloning
                    gr = GitRepository(path)
                    gr.checkout(hash)
                    url = path
                    project_lifetime = write_project_commit_info(
                        url, id, hash, dest_project_database_controller,
                        project_verbatim)
                    write_model_commit_info(model_files, url, id, hash,
                                            dest_model_database_controller,
                                            project_lifetime, model_verbatim)
                else:
                    logging.info("Skipping . ALready Processed {}".format(id))
            except Exception as e:
                logging.error(e)
                continue
            finally:
                shutil.rmtree(path)
    end = time.time()
    logging.info("IT took {} seconds".format(end - start))
示例#3
0
    def checkout_all_commit(self, repo_cfg: dict):

        gr = GitRepository(path=self.cfg['paths']['repo'] + repo_cfg['name'])

        df = pd.read_csv(self.cfg['paths']['commit_report'] +
                         repo_cfg['commit_file'],
                         header=0)
        df.sort_values(ascending=True, inplace=True, by=['author_date'])

        for commit in df['hash'].tolist():
            gr.checkout(commit)
            repo_cfg['commit'] = commit
            for detector in self.detectors:
                detector.run_on(repo_cfg)
示例#4
0
def main():
    repo_path = sys.argv[1]
    repo_branch = 'master'

    commits = RepositoryMining(repo_path,
                               only_in_branch=repo_branch).traverse_commits()
    commits = [commit for commit in commits]

    gitRepo = GitRepository(repo_path)

    start_date = commits[0].committer_date + relativedelta(years=3)
    last_date = commits[-1].committer_date - relativedelta(years=3)

    bug_tracker = defaultdict(list)
    bug_tracker_pickle = "data3/{}.pickle".format(
        os.path.basename(os.path.normpath(repo_path)))

    # First index the buggy files
    if os.path.exists(bug_tracker_pickle):
        with open(bug_tracker_pickle, 'rb') as handle:
            bug_tracker = pickle.load(handle)
    else:
        for commit_index, commit in enumerate(commits):
            if not is_bugfix_commit(commit.msg):
                continue

            try:
                for m in commit.modifications:
                    if not valid_source_file(m.filename):
                        continue

                    bug_commit = gitRepo.get_commits_last_modified_lines(
                        commit, m)  ### uses SZZ
                    # if bug_commit == {}: continue

                    bug_start_index = 99999999999999999999
                    for _file in bug_commit:
                        for i, _commit in enumerate(commits[:commit_index]):
                            if _commit.hash in bug_commit[_file] \
                                and i<bug_start_index:
                                bug_start_index = i

                    for _commit in commits[bug_start_index:commit_index]:
                        bug_tracker[_commit.hash].append(m.filename)
            except Exception as e:
                print("[***]", e)
                print(traceback.format_exc())
                print("Continuing for next commits")

            print(len(bug_tracker.keys()))
        with open(bug_tracker_pickle, 'wb') as handle:
            pickle.dump(bug_tracker, handle, protocol=pickle.HIGHEST_PROTOCOL)

    # Copy the files
    with open('maj_versions/{}.hash'.format(
            os.path.basename(os.path.normpath(repo_path)))) as f:
        major_releases = []
        for line in f.read().splitlines():
            tag, hash = line.split(',')
            major_releases.append((tag, hash))

    for version, commit in enumerate(commits):
        if not commit.hash in [item[1] for item in major_releases]:
            continue

        if commit.committer_date < start_date or commit.committer_date > last_date:
            continue

        for tag, hash in major_releases:
            if hash == commit.hash:
                break

        print("[*] Doing {}".format(tag))
        gitRepo.checkout(commit.hash)

        base_dir_not_bug = "data3/{}/{}/not_bug".format(
            os.path.basename(os.path.normpath(repo_path)), tag)
        base_dir_bug = "data3/{}/{}/bug".format(
            os.path.basename(os.path.normpath(repo_path)), tag)
        if not os.path.exists(base_dir_bug):
            os.makedirs(base_dir_bug)
        if not os.path.exists(base_dir_not_bug):
            os.makedirs(base_dir_not_bug)

        all_files = gitRepo.files()

        for _file in all_files:
            if not valid_source_file(_file):
                continue

            filename = os.path.basename(os.path.normpath(_file))
            if commit.hash in bug_tracker and filename in bug_tracker[
                    commit.hash]:
                file_path_to_write = os.path.join(base_dir_bug, filename)
            else:
                file_path_to_write = os.path.join(base_dir_not_bug, filename)

            shutil.copyfile(_file, file_path_to_write)

    print("All Done!")
示例#5
0
class RepositoryProcessor:
    def __init__(self, repository: str, owner: str):
        self.owner = owner
        self.repository = os.path.split(repository)[-1]
        self.repo = GitRepository(repository)
        self.mining = RepositoryMining(repository)
        self.pairs = []
        random.seed(42)

    def run(self):
        self.get_all_filepairs()
        with open(os.path.join('filepairs', self.repository, 'pairs.txt'),
                  'w') as f:
            f.write('\n'.join(
                map(lambda x: f'{x[0]} {x[1]} {x[2]}', self.pairs)))
        f.write('\n')

    def get_all_filepairs(self, file_filter=java_file_filter):
        commits = list(
            filter(lambda x: not x.merge, self.mining.traverse_commits()))
        for commit in commits:
            for modification in commit.modifications:
                if modification.change_type == ModificationType.MODIFY:
                    if file_filter(modification.filename):
                        self.get_file_pair(commit, modification)

    def get_file_pair(self, commit, modification: Modification):
        parent = commit.parents[0]

        repo = self.repo.project_name
        commit_hash = commit.hash
        filename = modification.filename

        path = os.path.join('filepairs', repo, commit_hash, filename)
        os.makedirs(path, exist_ok=True)

        self.repo.checkout(parent)
        before = os.path.join(self.repository, modification.old_path)
        before_saved = os.path.join(path,
                                    'before_' + commit_hash + '_' + filename)
        copyfile(before, before_saved)

        self.repo.checkout(commit_hash)
        after = os.path.join(self.repository, modification.new_path)
        after_saved = os.path.join(path,
                                   'after__' + commit_hash + '_' + filename)
        copyfile(after, after_saved)

        self.pairs.append(
            (before_saved, after_saved,
             commit_hash + '.' + self.owner + '.' + before.replace('/', '.')))

    def run_random(self, number):
        self.get_random_filepairs(number)
        with open(os.path.join('filepairs', self.repository, 'pairs.txt'),
                  'w') as f:
            f.write('\n'.join(
                map(lambda x: f'{x[0]} {x[1]} {x[2]}', self.pairs)))
            f.write('\n')

    def get_random_filepairs(self, number, file_filter=java_file_filter):
        commits = random.choices(list(
            filter(lambda x: not x.merge, self.mining.traverse_commits())),
                                 k=number)
        for idx, commit in enumerate(commits):
            print(f'Processing commit №{idx}: {commit.hash}')
            for modification in commit.modifications:
                if modification.change_type == ModificationType.MODIFY:
                    if file_filter(modification.filename):
                        self.get_file_pair(commit, modification)
示例#6
0
文件: views.py 项目: PqES/forkuptool
def simular_conflitos(request):
    # busca as opções de configuração de execução da ferramenta registradas no banco de dados
    configuracaoferramenta_choices = ConfiguracaoFerramenta.objects.all(
    ).order_by('-id')
    configuracaoferramenta_choices_to_choicefield = list()
    for configuracao in configuracaoferramenta_choices:
        configuracaoferramenta_choices_to_choicefield.append(
            [configuracao.pk, configuracao])

    # se GET cria o formulário em branco
    if request.method == 'GET':
        form = ExecutarFerramentaForm(
            configuracaoferramenta_choices_to_choicefield)
        title = 'Forkuptool - Módulo de análise de repositórios'
        subtitle = 'Selecione uma configuração para continuar'
        return render(request, 'simular_conflitos.html', locals())

    # se POST será necessário processar os dados do formulário
    elif request.method == 'POST':
        configuracaoferramenta_escolhida = None

        if 'configuracaoferramenta_escolhida' in request.POST:
            configuracaoferramenta_escolhida = request.POST[
                'configuracaoferramenta_escolhida']

        if configuracaoferramenta_escolhida:
            # busca a configuração para o id informado
            config = ConfiguracaoFerramenta.objects.get(
                pk=configuracaoferramenta_escolhida)
            gr = GitRepository(config.path_auxiliary_files)

            #TODO: mudar a selação dos commits, que hoje é fixo

            # commits_vendor = (
            # 	'fca529f', 'a639855', '168e36a', '48374cd', 'e0428a2',
            # 	'49275f2', '25ad58d', 'd7c1bdf', '75c8a35', '41f8ace',
            # 	'69e7a98', 'a8f4829', 'be98898', 'f7e815a', '03f178c',
            # 	'fc7af49', '74a8748', 'a58320e', '1d4fa85', '855e138',
            # 	'671aaa8', '06a967e', '048aa85', '9119a3f', 'a74d6ae',
            # 	'bb83f28', '8f5e756', '83610c5', '5c5fc7d', '5e74902',
            # 	'ce59547', '923f5ee', 'df8666d', 'c219a97', 'aefcd22',
            # 	'0be9ee9', 'bb3c479', 'e0557a0', 'f02b0c3', '8d5cdfa',
            # 	'e944c53',
            # 	'0a93957',)
            commits_vendor = ('48d1edb', )

            # commits_client = (
            # 	'a9cb50e', '085aa2a', '66165fd', '6eb4d4c', '353a877',
            # 	'b1bf9fe', '7891bb7', '70ddc11', '6a858e2', 'bbc8bea',
            # 	'df85d8e', 'b57eca7', '914f9b1', 'e8faf15', '3a1a355',
            # 	'da45411', '53464c7', '75f241c', '1e8e20b', '7c9583c',
            #     '7482e54', 'a2c2ae2', 'dc18528', '5b308d9', 'fc62c99',
            # 	'2977dbf', '90aa392', 'd2c2f36', 'e083730', '2edc4b3',
            # 	'3328138', '3595f20', 'd6b40e5', '1c82a62', '757c692',
            # 	'0230fb0', '3094585', '43a6ab3', '151f8a3', 'ec90a78',
            # 	'5a51d1a',
            # 	'287f6ed',)
            commits_client = ('d472976', )

            contador = 0
            conflitos_por_rodada = dict()
            for c in commits_vendor:
                total_trechos_conflitantes = 0
                total_linhas_conflitantes = 0
                conflitos = dict()

                gr.git().checkout('master')
                gr.git().checkout(c)
                branch_vendor = 't' + str(contador + 1) + 'vendor'
                gr.git().branch(branch_vendor)
                gr.git().checkout('master')
                gr.checkout(commits_client[contador])
                branch_client = 't' + str(contador + 1) + 'client'
                gr.git().branch(branch_client)
                branch_merge = 't' + str(contador + 1) + 'merge'
                gr.git().branch(branch_merge)
                gr.git().checkout(branch_merge)

                try:
                    # tenta fazer o merge; se executar sem erros é porque não houve conflito
                    gr.git().merge(branch_vendor)
                except Exception as e:
                    linhas_com_erro = str(e)
                    linhas_com_erro = linhas_com_erro.split('\n')
                    arquivos_com_conflito = identificar_arquivos_em_conflito(
                        linhas_com_erro)

                    for a in arquivos_com_conflito:
                        numero_trechos_conflitantes = 0
                        numero_linhas_conflitantes = 0
                        caminho_completo = gr.path.as_posix() + '/' + a
                        if not is_binary(caminho_completo):
                            numero_trechos_conflitantes = contar_ocorrencias_desta_linha_neste_arquivo(
                                '<<<<<<< HEAD', caminho_completo)
                            numero_linhas_conflitantes = contar_linhas_entre_esses_linhas_neste_arquivo(
                                '<<<<<<< HEAD', '=======', caminho_completo)
                        else:
                            numero_trechos_conflitantes = 1
                            numero_linhas_conflitantes = 1
                        total_trechos_conflitantes += numero_trechos_conflitantes
                        total_linhas_conflitantes += numero_linhas_conflitantes
                        conflitos[
                            caminho_completo] = numero_trechos_conflitantes
                    gr.git().merge('--abort')
                    gr.git().checkout('master')

                print(('Processou par {}: {} - {}').format(
                    (contador + 1), c, commits_client[contador]))

                conflitos_por_rodada[(contador +
                                      1)] = (conflitos,
                                             total_trechos_conflitantes,
                                             total_linhas_conflitantes)
                contador += 1

            print(conflitos_por_rodada)
            title = 'Forkuptool - Módulo de análise de repositórios'
            subtitle = 'Simulação de conflitos de mesclagem'
            return render(request, 'simular_conflitos_show.html', locals())

        else:
            messages.error(request, 'Necessário informar uma configuração')
            return render(request, 'index.html', {
                'title': 'Forkuptool',
                'subtitle': 'Bem-vindo',
            })
示例#7
0
import os
from pydriller import GitRepository

#
# Collects statistic about module file count.
# Will be replaced by SourceAnalyzer.py
#

repository = GitRepository('../magento2-git')
repository._conf.set_value("main_branch", '2.4-develop')
commit235p1 = repository.get_commit_from_tag('2.3.5-p1')

repository.checkout(commit235p1.hash)

path, dirs, files = next(os.walk("../magento2-git/app/code/Magento/Cron"))
file_count = len(files)

print(file_count)