Exemplo n.º 1
0
def fetch_logs(ssh, conn, cur, teams, users):
    """Fetch and save the logs for Git repositories by SSHing into Alioth."""

    today_date = datetime.date.today()
    # A regex pattern to match SHA-1 hashes.
    pattern = re.compile("[a-f0-9]{40}")

    for team in teams:
        # Get the already parsed revisions.
        all_revisions = checkrevision.read_configuration(team, 'git')

        # Get the directory listing.
        logging.info('Parsing repository: %s' % team)
        cwd = '/git/{0}'.format(team)

        stdin, stdout, stderr = ssh.exec_command("ls {0}".format(cwd))
        output = stdout.read()
        # Get only the git directories.
        git_dir = [dir for dir in output.splitlines() if dir.endswith('.git')]

        for each_dir in git_dir:
            no_debian = False
            logging.info('\tPackage: %s' % each_dir)
            cwd_process = cwd + '/{0}'.format(each_dir)
            
            # First fetch the authors who have committed to the Debian branch.
            # This is used to filter upstream contributors who are contributing
            # but are not part of the team and hence not part of the metrics.
            author_cmd = "git --git-dir={0} log --pretty=format:'%an' -- debian".format(cwd_process)
            stdin, stdout, stderr = ssh.exec_command(author_cmd)
            authors_lst = stdout.read().splitlines()

            # Uniquify the authors.
            authors = set(authors_lst)

            # But for teams who are not contributing to Debian development,
            # there is no Debian branch. So fetch all the statistics for them.
            if not authors:
                logging.warning('No Debian branch found')
                author_cmd = "git --git-dir={0} log --pretty=format:'%an'".format(cwd_process)
                stdin, stdout, stderr = ssh.exec_command(author_cmd)
                authors_lst = stdout.read().splitlines()
                authors = set(authors_lst)
                no_debian = True
                # If there are still no authors, go on to the next team. 
                if not authors:
                    continue

            # Fetch the commit details for each author.
            for author in authors:
                if author == 'unknown':
                    continue

                if no_debian:
                    stat_cmd = ("git --git-dir={0} log --no-merges --author='{1} <' "
                   "--pretty=format:'%H,%ai' --shortstat".format(cwd_process, author))
                else:
                    stat_cmd = ("git --git-dir={0} log --no-merges --author='{1} <' "
                   "--pretty=format:'%H,%ai' --shortstat -- debian".format(cwd_process, author))

                stdin, stdout, stderr = ssh.exec_command(stat_cmd)
                author_read = stdout.read().splitlines()

                # There are some log entries that don't have any lines changed
                # but are a commit of a merge or a tag. We filter entries.
                found = True
                for element in author_read[:]:
                    if pattern.match(element):
                        if not found:
                            element_index = author_read.index(element)
                            author_read.pop(element_index-1)
                        found = False
                    else:
                        found = True

                author_raw = [element.strip() for element in author_read 
                                                                    if element]

                author_info = []
                for a, b in zip(author_raw[::2], author_raw[1::2]):
                    author_info.append(a+','+b)


                for change in author_info:
                    # If the revision has already been parsed.
                    if team in all_revisions:
                        if change[:6] in all_revisions[team]:
                            continue

                    try:
                        commit_hash, date_raw, changed, added, deleted = change.split(',')
                    except ValueError as detail:
                        logging.error(detail)
                        continue

                    # There are some invalid dates, just skip those commits.
                    try:
                        date = date_raw.split()[0]
                    except IndexError as detail:
                        logging.warning('Invalid date: %s' % date)
                        logging.error(detail)
                        continue
                    added = added.strip().split()[0]
                    deleted = deleted.strip().split()[0]

                    if each_dir.endswith('.git'):
                        each_dir = each_dir[:-4]

                    try:
                        cur.execute(
                        """INSERT INTO commitstat(commit_id, project, package, vcs, name, 
                            commit_date, today_date, lines_inserted, lines_deleted) 
                                VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s);""",
                  (commit_hash, team, each_dir, 'git', author, date, today_date, added, deleted)
                                    )
                        conn.commit()
                    except psycopg2.DataError as detail:
                        conn.rollback()
                        logging.error(detail)
                        continue
                    except psycopg2.IntegrityError as detail:
                        conn.rollback()
                        logging.warning("Hash '%s' in '%s' package duplicated" % (commit_hash, each_dir))
                        continue

                    checkrevision.save_configuration(team, commit_hash[:6], 'git')

    logging.info('Git logs saved...')
Exemplo n.º 2
0
def parse_revision():
    """Fetch the revisions for the called teams."""
    revisions = collections.defaultdict(list)
    today_date = datetime.date.today()

    team = sys.argv[1]
    parse_f = open(PARSE_INFO_FILE, 'w')

    cmd_raw = 'svn log --xml file:///svn/{0}/'.format(team)
    cmd = shlex.split(cmd_raw)

    output = subprocess.Popen(cmd, stdout=subprocess.PIPE).communicate()[0]
    output_xml = ET.fromstring(output)

    # Get the list of committers and their revisions from the repository.
    new_changes = []
    author_info = collections.defaultdict(list)
    revision_date = {}
    for info in output_xml.getiterator('logentry'):
        # In some cases, the author tag is missing.
        try:
            author, date, msg = [element.text for element in info.getchildren()]
        except ValueError:
            continue
        revision = info.get('revision')
        author_info[author].append(revision)
        revision_date[revision] = date.split('T')[0]

    # Some authors are a result of missing authors or merges, so ignore them.
    for ignore_author in IGNORE:
        if ignore_author in author_info:
            del author_info[ignore_author]

    vcs = 'svn'
    total_authors = len(author_info)
    for committer, revision in author_info.iteritems():
        project = team
        package = team
        author = committer

        # Fetch the diff for each revision of an author. If the revision
        # has already been downloaded, it won't be downloaded again.
        done_revisions = checkrevision.read_configuration(team, 'svn')

        for change in revision:
            # Open the REVISION_FILE_PATH that is used to save the parsed revisions.
            if team in done_revisions:
                if change in done_revisions[team]:
                    continue

            if SKIP_LINES:
                parse_f.write(FORMAT.format(change, project, package, vcs,
                                            author, revision_date[change], today_date))
                parse_f.write('\n')
                parse_f.flush()
                checkrevision.save_configuration(project, change, 'svn')

            else:
                inserted = 0
                deleted = 0

                cmd_raw = 'svn diff -c {0} file:///svn/{1}/'.format(change, team)
                cmd = shlex.split(cmd_raw)
                output = subprocess.Popen(cmd, stdout=subprocess.PIPE).communicate()[0]

                lines = [line for line in output.splitlines()
                                                        if line.startswith(('+', '-'))]
                for line in lines:
                    if not line.startswith(('+++', '---')):
                        if line.startswith('+'):
                            inserted += 1
                        else:
                            deleted += 1

                parse_f.write(FORMAT_ALL.format(change, project, package, vcs,
                                                author, revision_date[change],
                                                today_date, inserted, deleted))
                parse_f.write('\n')
                parse_f.flush()
                checkrevision.save_configuration(project, change, 'svn')

    parse_f.close()
    sys.exit()