示例#1
0
def search_deleted_files(repository_id, repository_name, master_branch):
    connection = PSQLConnection.get_connection()
    cursor = connection.cursor()

    repository_directory = DiretoryConfig.get_parameter('repository_directory') + repository_name
    git_deleted_log_file_regex = FileHandlerConfig.get_parameter('git_deleted_log_file_regex')
    file_regex = FileHandlerConfig.get_parameter('parseable_files_regex')

    command = "git log --diff-filter=D --summary"
    process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr = subprocess.PIPE, stdin = subprocess.PIPE, shell=True, cwd=repository_directory)
    git_log_output = process.communicate()[0].strip().decode("utf-8").split('\n')

    commit_hash     = ''
    author_name     = ''
    author_email    = ''
    author_date     = ''
    version_path    = ''

    for git_log_output_line in git_log_output:
            # removes non ascii characters
            stripped = (c for c in git_log_output_line if 0 < ord(c) < 127)
            stripped_line = ''.join(stripped)
            
            git_log_file_matcher = re.match(git_deleted_log_file_regex, stripped_line)
            if git_log_file_matcher is not None:
                if git_log_file_matcher.group(1):         
                    commit_hash  = git_log_file_matcher.group(1)
                    # print (commit_hash)
                if git_log_file_matcher.group(2):
                    author_name  = git_log_file_matcher.group(2)
                    # print (author_name)
                if git_log_file_matcher.group(3):
                    author_email = git_log_file_matcher.group(3) 
                    # print (author_email)
                if git_log_file_matcher.group(4):
                    author_date  = git_log_file_matcher.group(4)
                    # print (author_date)
                if git_log_file_matcher.group(5):
                    version_path = git_log_file_matcher.group(5)
                    file_regex_matcher = re.match(file_regex, version_path)
                    if file_regex_matcher is not None:
                        # print (version_path)
                        cursor.execute("select count(*) from file_versions where older_version_path = %s and commit_hash = %s", (version_path, commit_hash))
                        found_in_database = cursor.fetchone()[0]
                        if found_in_database == 0:
                            print(found_in_database, version_path, commit_hash)
                            file_name = version_path.split('/')[-1]
                            file_id = insert_file(repository_id, file_name, version_path, commit_hash)
                            if file_id is not None:
                                execute_git_log_to_get_versions("git log "+commit_hash+"^ --follow --stat=350 --stat-graph-width=2 -- ", file_id, version_path, repository_directory)
示例#2
0
def process_parseable_files(repository_id, repository_name):
    repository_path = DiretoryConfig.get_parameter('repository_directory') + repository_name
    file_regex = FileHandlerConfig.get_parameter('parseable_files_regex')
    for root, dirs, files in os.walk(repository_path):
        for file in files:
            file_matcher = re.match(file_regex, file)
            if file_matcher is not None:
                absolute_path = os.path.join(root, file).replace(repository_path + '/', '')
                file_id = insert_file(repository_id, file, absolute_path)                
                print (absolute_path)
示例#3
0
def extract_file_versions(repository_id, repository_name):

    repository_path = DiretoryConfig.get_parameter('repository_directory') + repository_name
    git_log_file_regex = FileHandlerConfig.get_parameter('git_log_file_regex')

    connection = PSQLConnection.get_connection()
    cursor = connection.cursor()
    
    cursor.execute('select id, file_path from files where repository_id = %s', (repository_id, ))
    files_results =  cursor.fetchall()
    connection.close()

    for files_results_line in files_results:

        file_id = files_results_line[0]
        file_path = files_results_line[1]

        execute_git_log_to_get_versions("git log --follow --stat=350 --stat-graph-width=2 -- ", file_id, file_path, repository_path)
示例#4
0
def execute_git_log_to_get_versions(git_log_command, file_id, file_path, repository_path):


    connection = PSQLConnection.get_connection()
    cursor = connection.cursor()

    git_log_file_regex = FileHandlerConfig.get_parameter('git_log_file_regex')

    commit_hash     = ''
    author_name     = ''
    author_email    = ''
    author_date     = ''
    version_path    = ''
    older_version_path = ''

    command = git_log_command + file_path
    process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr = subprocess.PIPE, stdin = subprocess.PIPE, shell=True, cwd=repository_path)
    git_log_output = process.communicate()[0].strip().decode("utf-8").split('\n')

    # print (git_log_output)
    for git_log_output_line in git_log_output:
        # removes non ascii characters
        stripped = (c for c in git_log_output_line if 0 < ord(c) < 127)
        stripped_line = ''.join(stripped)
        
        git_log_file_matcher = re.match(git_log_file_regex, stripped_line)
        if git_log_file_matcher is not None:
            if git_log_file_matcher.group(1):         
                if commit_hash is not '':
                    cursor.execute("insert into file_versions (file_id, commit_hash, author_name, author_email, author_date, version_path, older_version_path) values ( %s, %s, %s, %s, to_timestamp(%s, 'Dy Mon DD HH24:MI:SS YYYY +-####'), %s, %s)", (file_id, commit_hash, author_name, author_email, author_date, version_path, older_version_path))                  
                    connection.commit()
                commit_hash  = git_log_file_matcher.group(1)  

            if git_log_file_matcher.group(2):
                author_name  = git_log_file_matcher.group(2)
            if git_log_file_matcher.group(3):
                author_email = git_log_file_matcher.group(3) 
            if git_log_file_matcher.group(4):
                author_date  = git_log_file_matcher.group(4)
            if git_log_file_matcher.group(5):
                version_path = git_log_file_matcher.group(5).strip()
                older_version_path = ''
                if '=>' in version_path:
                    print (version_path)
                    if '{' in version_path :
                        sub_string = version_path[version_path.find('{'): version_path.find('}')+1]
                        difference_list = sub_string.split('=>')
                        if difference_list[0].replace('{', '') == ' ':
                            older_version_path = git_log_file_matcher.group(5).strip().replace(sub_string + "/", sub_string.split('=>')[0].strip().replace('{','').replace('}',''))           
                            version_path = git_log_file_matcher.group(5).strip().replace(sub_string, sub_string.split('=>')[1].strip().replace('{','').replace('}','')) 

                        elif difference_list[1].replace('}', '') == ' ':
                            older_version_path = git_log_file_matcher.group(5).strip().replace(sub_string, sub_string.split('=>')[0].strip().replace('{','').replace('}',''))           
                            version_path = git_log_file_matcher.group(5).strip().replace(sub_string + "/", sub_string.split('=>')[1].strip().replace('{','').replace('}','')) 

                        else:
                            older_version_path = git_log_file_matcher.group(5).strip().replace(sub_string, sub_string.split('=>')[0].strip().replace('{','').replace('}',''))
                            version_path = git_log_file_matcher.group(5).strip().replace(sub_string, sub_string.split('=>')[1].strip().replace('{','').replace('}',''))
                    else:
                        older_version_path = git_log_file_matcher.group(5).split('=>')[0].strip()
                        version_path = git_log_file_matcher.group(5).split('=>')[1].strip()

    # last line of the file
    cursor.execute("insert into file_versions (file_id, commit_hash, author_name, author_email, author_date, version_path, older_version_path) values ( %s, %s, %s, %s, to_timestamp(%s, 'Dy Mon DD HH24:MI:SS YYYY +-####'), %s, %s)", (file_id, commit_hash, author_name, author_email, author_date, version_path, older_version_path))
    connection.commit()