def fetch_logs(ssh, conn, cur, teams, users): """Fetch and save the logs for Git repositories by SSHing into Alioth.""" today_date = datetime.date.today() # A regex pattern to match SHA-1 hashes. pattern = re.compile("[a-f0-9]{40}") for team in teams: # Get the already parsed revisions. all_revisions = checkrevision.read_configuration(team, 'git') # Get the directory listing. logging.info('Parsing repository: %s' % team) cwd = '/git/{0}'.format(team) stdin, stdout, stderr = ssh.exec_command("ls {0}".format(cwd)) output = stdout.read() # Get only the git directories. git_dir = [dir for dir in output.splitlines() if dir.endswith('.git')] for each_dir in git_dir: no_debian = False logging.info('\tPackage: %s' % each_dir) cwd_process = cwd + '/{0}'.format(each_dir) # First fetch the authors who have committed to the Debian branch. # This is used to filter upstream contributors who are contributing # but are not part of the team and hence not part of the metrics. author_cmd = "git --git-dir={0} log --pretty=format:'%an' -- debian".format(cwd_process) stdin, stdout, stderr = ssh.exec_command(author_cmd) authors_lst = stdout.read().splitlines() # Uniquify the authors. authors = set(authors_lst) # But for teams who are not contributing to Debian development, # there is no Debian branch. So fetch all the statistics for them. if not authors: logging.warning('No Debian branch found') author_cmd = "git --git-dir={0} log --pretty=format:'%an'".format(cwd_process) stdin, stdout, stderr = ssh.exec_command(author_cmd) authors_lst = stdout.read().splitlines() authors = set(authors_lst) no_debian = True # If there are still no authors, go on to the next team. if not authors: continue # Fetch the commit details for each author. for author in authors: if author == 'unknown': continue if no_debian: stat_cmd = ("git --git-dir={0} log --no-merges --author='{1} <' " "--pretty=format:'%H,%ai' --shortstat".format(cwd_process, author)) else: stat_cmd = ("git --git-dir={0} log --no-merges --author='{1} <' " "--pretty=format:'%H,%ai' --shortstat -- debian".format(cwd_process, author)) stdin, stdout, stderr = ssh.exec_command(stat_cmd) author_read = stdout.read().splitlines() # There are some log entries that don't have any lines changed # but are a commit of a merge or a tag. We filter entries. found = True for element in author_read[:]: if pattern.match(element): if not found: element_index = author_read.index(element) author_read.pop(element_index-1) found = False else: found = True author_raw = [element.strip() for element in author_read if element] author_info = [] for a, b in zip(author_raw[::2], author_raw[1::2]): author_info.append(a+','+b) for change in author_info: # If the revision has already been parsed. if team in all_revisions: if change[:6] in all_revisions[team]: continue try: commit_hash, date_raw, changed, added, deleted = change.split(',') except ValueError as detail: logging.error(detail) continue # There are some invalid dates, just skip those commits. try: date = date_raw.split()[0] except IndexError as detail: logging.warning('Invalid date: %s' % date) logging.error(detail) continue added = added.strip().split()[0] deleted = deleted.strip().split()[0] if each_dir.endswith('.git'): each_dir = each_dir[:-4] try: cur.execute( """INSERT INTO commitstat(commit_id, project, package, vcs, name, commit_date, today_date, lines_inserted, lines_deleted) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s);""", (commit_hash, team, each_dir, 'git', author, date, today_date, added, deleted) ) conn.commit() except psycopg2.DataError as detail: conn.rollback() logging.error(detail) continue except psycopg2.IntegrityError as detail: conn.rollback() logging.warning("Hash '%s' in '%s' package duplicated" % (commit_hash, each_dir)) continue checkrevision.save_configuration(team, commit_hash[:6], 'git') logging.info('Git logs saved...')
def parse_revision(): """Fetch the revisions for the called teams.""" revisions = collections.defaultdict(list) today_date = datetime.date.today() team = sys.argv[1] parse_f = open(PARSE_INFO_FILE, 'w') cmd_raw = 'svn log --xml file:///svn/{0}/'.format(team) cmd = shlex.split(cmd_raw) output = subprocess.Popen(cmd, stdout=subprocess.PIPE).communicate()[0] output_xml = ET.fromstring(output) # Get the list of committers and their revisions from the repository. new_changes = [] author_info = collections.defaultdict(list) revision_date = {} for info in output_xml.getiterator('logentry'): # In some cases, the author tag is missing. try: author, date, msg = [element.text for element in info.getchildren()] except ValueError: continue revision = info.get('revision') author_info[author].append(revision) revision_date[revision] = date.split('T')[0] # Some authors are a result of missing authors or merges, so ignore them. for ignore_author in IGNORE: if ignore_author in author_info: del author_info[ignore_author] vcs = 'svn' total_authors = len(author_info) for committer, revision in author_info.iteritems(): project = team package = team author = committer # Fetch the diff for each revision of an author. If the revision # has already been downloaded, it won't be downloaded again. done_revisions = checkrevision.read_configuration(team, 'svn') for change in revision: # Open the REVISION_FILE_PATH that is used to save the parsed revisions. if team in done_revisions: if change in done_revisions[team]: continue if SKIP_LINES: parse_f.write(FORMAT.format(change, project, package, vcs, author, revision_date[change], today_date)) parse_f.write('\n') parse_f.flush() checkrevision.save_configuration(project, change, 'svn') else: inserted = 0 deleted = 0 cmd_raw = 'svn diff -c {0} file:///svn/{1}/'.format(change, team) cmd = shlex.split(cmd_raw) output = subprocess.Popen(cmd, stdout=subprocess.PIPE).communicate()[0] lines = [line for line in output.splitlines() if line.startswith(('+', '-'))] for line in lines: if not line.startswith(('+++', '---')): if line.startswith('+'): inserted += 1 else: deleted += 1 parse_f.write(FORMAT_ALL.format(change, project, package, vcs, author, revision_date[change], today_date, inserted, deleted)) parse_f.write('\n') parse_f.flush() checkrevision.save_configuration(project, change, 'svn') parse_f.close() sys.exit()