def codeMetricsTable(nameLabel, dataFrame, sourceFilesDirectory, storageConnection): import storage import scmData as scm import dataUtilities tableName = nameLabel + '_codeMetrics' if storage.tableExists(tableName, storageConnection): return storage.readTable(tableName, storageConnection) scm.gitlog.switchToRevision(sourceFilesDirectory,nameLabel) metricsData = collectMetrics(os.path.join(os.getcwd(),"..","ck"), sourceFilesDirectory) #if not metricsData: #return None #metricsData = pd.concat(metricsData) metricsData = dropUnusedMetrics(metricsData) #metricsDataFrame = metricsData.rename(columns={"file":"entity"}) #metricsData = dataUtilities.formatEntityNames(metricsData, sourceFilesDirectory) storage.writeTable(tableName, storageConnection, metricsData) return metricsData
def makeCommitDateMapping(sourceDirectory, filesToInspect, storageConnection, branchname='master'): import storage tableName = branchname + '_commitDates' if storage.tableExists(tableName, storageConnection): return storage.readTable(tableName, storageConnection) switchToRevision(sourceDirectory, branchname) command = "cd %s; git log --pretty=format:'%%h,%%ad' --date=short -- %s" % ( sourceDirectory, filesToInspect) try: dates = pd.read_csv(StringIO( subprocess.check_output(command, shell=True)), names=['sha', 'date'], dtype={ 'sha': str, 'date': str }) storage.writeTable(tableName, storageConnection, dates) except subprocess.CalledProcessError: dates = "no commit date mapping available" return dates
def indentMetricsTable(nameLabel, dataFrame, sourceFilesDirectory, storageConnection, tabsize=4): import storage import scmData as scm import dataUtilities tableName = nameLabel + '_indentMetrics' if storage.tableExists(tableName, storageConnection): return storage.readTable(tableName, storageConnection) scm.gitlog.switchToRevision(sourceFilesDirectory, nameLabel) indentMetrics = pd.DataFrame() for entityName in dataFrame['entity']: nextFrame = indentCountStats(sourceFilesDirectory + entityName, tabsize) nextFrame['entity'] = sourceFilesDirectory + entityName indentMetrics = pd.concat([indentMetrics, nextFrame]) if indentMetrics.empty: return None indentMetrics = dataUtilities.formatEntityNames(indentMetrics, sourceFilesDirectory) storage.writeTable(tableName, storageConnection, indentMetrics) return indentMetrics
def codeMetricsTable(nameLabel, dataFrame, sourceFilesDirectory, storageConnection): import storage import scmData as scm import dataUtilities tableName = nameLabel + '_codeMetrics' if storage.tableExists(tableName, storageConnection): return storage.readTable(tableName, storageConnection) scm.gitlog.switchToRevision(sourceFilesDirectory,nameLabel) metricsData = [] for entityName in dataFrame['entity']: analysis = runAnalysisOnFile(sourceFilesDirectory+entityName) metricsData.append(analysis) if not metricsData: return None metricsData = pd.concat(metricsData) metricsData = dropUnusedCMetrics(metricsData) metricsData = dataUtilities.formatEntityNames(metricsData, sourceFilesDirectory) storage.writeTable(tableName, storageConnection, metricsData) return metricsData
def makeGitLog(sourceDirectory, filesToInspect, lastTimestamp, previousTimestamp, storageConnection): import storage tableName = 'gitLog_' + lastTimestamp + '_' + previousTimestamp if storage.tableExists(tableName, storageConnection): return command = "cd %s; git log %s...%s --pretty=format:'[%%h] %%aN %%ad %%s' --date=short --date-order --numstat -- %s" % ( sourceDirectory, lastTimestamp, previousTimestamp, filesToInspect) gitlog = subprocess.check_output(command, shell=True) storage.writeFile(tableName, storageConnection, gitlog)
def changeMetricsTable(lastTime, previousTime, logFileType, storageConnection): import storage tableNameChange = lastTime + '_changeMetrics' if storage.tableExists(tableNameChange, storageConnection): return storage.readTable(tableNameChange, storageConnection) tableNameLog = 'gitLog_' + lastTime + '_' + previousTime scmLogPath = storage.readFile(tableNameLog, storageConnection, toTemporaryFile=True) results = runAnalysisList(scmLogPath, logFileType, entityAnalysisTypes) changeMetrics = pd.read_csv(results[0]) for result in results[1:]: changeMetrics = changeMetrics.merge(pd.read_csv(result), on='entity') changeMetrics.drop('n-revs_y', axis=1, inplace=True) changeMetrics = changeMetrics.rename(columns={'n-revs_x': 'n-revs'}) storage.writeTable(tableNameChange, storageConnection, changeMetrics) return changeMetrics