def run(self, hw, part): self.projectName = 'Instrumentation' self.testClient = CodewebsUnitTestClient(UNITTESTERHOST) astdir = os.path.join(FileSystem.getAstDir(), 'ast_' + str(hw) + '_' + str(part)) outputDir = os.path.join(FileSystem.getDataDir(),\ 'Instrumentation',str(hw) + '_' + str(part)) srcFiles = [ fname for fname in os.listdir(astdir) if fname[-5:] == '.code' ] for idx, fname in enumerate(srcFiles): if idx % 100 == 0: print(str(idx) + ' of ' + str(len(srcFiles))) astId = fname[4:-5] outputPath = os.path.join(outputDir, 'ast_' + astId + '.trace') fullname = os.path.join(astdir, fname) try: instrumentedCodePath = self.instrument(fullname, outputPath) except: print('Instrumentation Error') continue with open(instrumentedCodePath) as fid: code = fid.read() self.testClient.call(code) testResult = self.testClient.wait()
def run(self): dirName = os.path.join(FileSystem.getDataDir(), 'incorrects') if not os.path.exists(dirName): os.makedirs(dirName) logDirName = os.path.join(FileSystem.getLogDir(), 'incorrects') if not os.path.exists(logDirName): os.makedirs(logDirName) logFileName = os.path.join(logDirName, 'log') logging.basicConfig(filename = logFileName, \ format = '%(asctime)s %(message)s', \ datefmt = '%m/%d/%Y %I:%M:%S %p', level = logging.INFO) logging.info('ListIncorrects()') for part in MLClass.allProblems(): print(part) logging.info('Problem ' + str(part)) incorrectASTs, incorrectSubmissions = \ self.getASTids(part, 'incorrects') correctASTs, correctSubmissions = self.getASTids(part, 'corrects') self.writeASTs(incorrectASTs, 'incorrects', part, dirName) self.writeASTs(correctASTs, 'corrects', part, dirName) self.writeNumSubmissions(incorrectSubmissions, \ 'incorrects', part, dirName) self.writeNumSubmissions(correctSubmissions, \ 'corrects', part, dirName) submissionMap = self.getSubmissionMap(part) self.writeUsers(incorrectSubmissions, 'incorrects', \ part, submissionMap, dirName) self.writeUsers(correctSubmissions, 'corrects', \ part, submissionMap, dirName)
def run(self): dirname = 'DumpNumSubmissions' FileSystem.initializeLogging(dirname) outputDir = os.path.join(FileSystem.getDataDir(), dirname) if not os.path.exists(outputDir): os.makedirs(outputDir) for (h, p) in MLClass.allProblems(): assn = Assignment(h, p) path = os.path.join(outputDir,\ 'NumSubmissions_' + str(assn) + '.txt') #self.loadOutputs(assn, astOutputPath, mapOutputPath) self.loadNumSubmissions(assn, path)
def findNearestCorrects(self, assn, asts, numASTs, label): nn = {} if label == 'corrects': for ast in asts: nn[ast] = ast return nn corrects = self.getAsts(assn, 'corrects') sources = asts[:numASTs] distanceMatrix = FileSystem.loadDistanceMatrix(assn.getTuple(), False) subIdMap = FileSystem.loadSubmissionIdMap(assn.getTuple()) astNetwork = AstNetwork(assn.getTuple, distanceMatrix, subIdMap) D = astNetwork.getDistanceList(sources, corrects) for s in D: nn[s] = D[s][np.argmin([y for (x, y) in D[s]])][0] return nn
def testProblem(hwId, partId): print('Unit testing homework ' + str(hwId) + ', part ' + str(partId)) logFile = FileSystem.getLogDir() + '/octave_unittesting/log_' + str( hwId) + '_' + str(partId) logging.basicConfig(filename = logFile, format = '%(asctime)s %(message)s', \ datefmt='%m/%d/%Y %I:%M:%S %p', level=logging.DEBUG) print('Loading unit testing code') tester = UnitTester(hwId, partId) print('Loading submissions') Submissions = Octave.objects.filter(homework_id=hwId, part_id=partId) print('Unit testing started.') for submission, i in zip(Submissions, range(len(Submissions))): # run unit tests for submission i print('Running submission ' + str(i) + ' of ' + str(len(Submissions))) tester.refreshWorkingDir() tester.loadCode(submission.code) with Timer() as t: output, correct = tester.run() print('\tRequest took %.03f sec.' % t.interval) # commit output to db #submission.output = output #submission.correct = correct ######submission.save() logging.debug( report(hwId, partId, i, len(Submissions), correct, submission.id, t.interval))
def run(self): logDir = os.path.join(FileSystem.getLogDir(), 'cluster') if not os.path.exists(logDir): os.makedirs(logDir) logFileName = os.path.join(logDir, 'log') logging.basicConfig(filename = logFileName, \ format = '%(asctime)s %(message)s', \ datefmt = '%m/%d/%Y %I:%M:%S %p', level = logging.INFO) labels = ['corrects', 'incorrects'] for assn in self.getAllParts(): for label in labels: logging.info('Cluster.run(): (hw,part): ' \ + str(assn) + ', ' + label) G = self.loadGraph(assn, THRESHOLD, label) Gfilt = self.filterEdges(G, FILTERTHRESHOLD) clusters = self.commonClusters(Gfilt, NUMCLUSTERS) for C, Cidx in zip(clusters, range(len(clusters))): asts = self.sortByConnectivity(C) numSubmissions = self.numSubmissionsInCluster(C) logging.info('--------------------') logging.info('Clustersize: ' + str(len(C.vs)) + \ ' ' + str(len(C.es)) + ' ' + str(numSubmissions)) #print(asts) logging.info('Finding nearest corrects.') nn = self.findNearestCorrects(assn, asts, NUMASTS, label) self.writeResults(assn,Cidx,asts,NUMASTS, \ numSubmissions,nn,label) logging.info('Done.')
def initializeLog(self): logDir = os.path.join(FileSystem.getLogDir(), 'PrecomputeNN') if not os.path.exists(logDir): os.makedirs(logDir) logFileName = os.path.join(logDir, 'log') logging.basicConfig(filename = logFileName, format = '%(asctime)s %(message)s', \ datefmt = '%m/%d/%Y %I:%M:%S %p', level = logging.INFO)
def readTemplate(self): path = os.path.join(FileSystem.getDataDir(), 'equivalence', 'templates', 'template.html') with open(path) as fid: template = fid.read() (before, after) = template.split('{{hierarchy}}') (middle, after) = after.split('{{script}}') self.template = {'before': before, 'middle': middle, 'after': after}
def getOutputFilePath(self, assn, threshold, label): dataDir = FileSystem.getDataDir() outputDir = os.path.join(dataDir, 'incorrects') if not os.path.exists(outputDir): os.makedirs(outputDir) fileName = label + 'Graph.' + str(assn) + '.sparse' + str( threshold) + '.gml' return os.path.join(outputDir, fileName)
def getResultPartPath(self, statName, part): syntaxDir = os.path.join(FileSystem.getResultsDir(), 'syntaxStatistics') statDir = os.path.join(syntaxDir, statName) fileName = statName + '_' + str(part[0]) + '_' + str(part[1]) + '.csv' if not os.path.exists(statDir): os.makedirs(statDir) return os.path.join(statDir, fileName)
def __init__(self, hwId, partId): self.hwId = hwId self.partId = partId self.dataDir = os.path.join( FileSystem.getDataDir(), 'octave_unittest/mlclass-ex' + str(self.hwId)) #print(self.dataDir) assert (os.path.exists(self.dataDir)) self._loadCorrect() self.workingDir = os.path.join( FileSystem.getWorkingDir(), 'unitTesting_' + str(self.hwId) + '_' + str(self.partId)) self.createWorkingDir() self.refreshWorkingDir() self._writeUnitTestScript() self._writeUnitTestFilesScript() #print(self.fileScriptName) self.unitTestFile = self.workingDir + '/' + self.getUnitTestFile()
def _writeUnitTestFilesScript(self): self.fileScriptName = self.workingDir + '/unittestfilesscript.sh' fid = open(self.fileScriptName, 'wt') fid.write('#! ' + FileSystem.getOctave() + ' -qf\n') fid.write('addpath(\"' + self.workingDir + '\");\n') fid.write('fname = unittest(' + str(self.partId) + ', true);\n') fid.write('printf(fname);') fid.write('return\n') fid.close() self._makeExecutable(self.fileScriptName)
def save(self, statName, astNetwork, matrix): syntaxDir = os.path.join(FileSystem.getResultsDir(), 'interplayStatistics') statDir = os.path.join(syntaxDir, statName) fileName = statName + '_' + str(astNetwork.part[0]) + '_' + str( astNetwork.part[1]) + '.csv' if not os.path.exists(statDir): os.makedirs(statDir) path = os.path.join(statDir, fileName) numpy.savetxt(path, matrix, delimiter=",")
def load(self, statName, astNetwork): syntaxDir = os.path.join(FileSystem.getResultsDir(), 'interplayStatistics') statDir = os.path.join(syntaxDir, statName) fileName = statName + '_' + str(astNetwork.part[0]) + '_' + str( astNetwork.part[1]) + '.csv' path = os.path.join(statDir, fileName) if not os.path.exists(path): raise Exception('File not found') return numpy.loadtxt(path, delimiter=",")
def getOutputPath(self, assn, Cidx, label): resultsDir = FileSystem.getResultsDir() clustersDir = os.path.join(resultsDir, 'clusters') if not os.path.exists(clustersDir): os.makedirs(clustersDir) connectedCompDir = os.path.join(clustersDir, 'connectedComponents') if not os.path.exists(connectedCompDir): os.makedirs(connectedCompDir) filename = label + 'cluster_' + str(assn) + '_' + str(Cidx) + '.txt' return os.path.join(connectedCompDir, filename)
def getAsts(self, assn, label): dataDir = FileSystem.getDataDir() outputDir = os.path.join(dataDir, 'incorrects') fileName = label + '_' + str(assn) + '.txt' path = os.path.join(outputDir, fileName) astList = [] astFile = open(path) for line in astFile.readlines(): astList.append(int(line)) return Set(astList)
def instrument(self, fname, outputPath): path = os.path.join(FileSystem.getWorkingDir(), self.projectName, 'tmp.m') cmd = ['java', '-cp', INJECTPATH, INJECTEXEC, fname, path, outputPath] injectCmd = RunExternal(cmd, 60, True) injectCmd.run() errorCode = injectCmd.getErrorCode() if errorCode != 0: raise Exception('Instrumentation Error!') return path
def run(self): self.initializeLog() for (h, p) in self.getAllParts(): assn = Assignment(h, p) logging.info('PrecomputeNN (hw,part): ' + str(assn)) corrects = self.getASTs(assn, 'corrects') incorrects = self.getASTs(assn, 'incorrects') distanceMatrix = FileSystem.loadDistanceMatrix( assn.getTuple(), False) subIdMap = FileSystem.loadSubmissionIdMap(assn.getTuple()) astNetwork = AstNetwork(assn.getTuple(), distanceMatrix, subIdMap) NNmap = self.getNN(corrects, incorrects, astNetwork) outputDir = os.path.join(FileSystem.getDataDir(), 'nearestNeighbors') if not os.path.exists(outputDir): os.makedirs(outputDir) outputPath = os.path.join(outputDir, 'NNmap_' + str(assn) + '.txt') self.writeNN(outputPath, NNmap)
def _writeUnitTestScript(self): self.scriptName = self.workingDir + '/unittestscript.sh' fid = open(self.scriptName, 'wt') fid.write('#! ' + FileSystem.getOctave() + ' -qf\n') fid.write('addpath(\"' + self.workingDir + '\");\n') fid.write('output = unittest(' + str(self.partId) + ', false);\n') fid.write('printf(output);\n') fid.write('printf(\'\\nresult\');\n') fid.write('return\n') fid.close() self._makeExecutable(self.scriptName)
def __init__(self, part, matrixFile, idMap): self.matrixFile = matrixFile self.subIdMap = idMap self.part = part self.stats = {} logDir = os.path.join(FileSystem.getLogDir(), 'astnetwork') if not os.path.exists(logDir): os.makedirs(logDir) logFileName = os.path.join(logDir, 'log') logging.basicConfig(filename = logFileName, format = '%(asctime)s %(message)s', \ datefmt = '%m/%d/%Y %I:%M:%S %p', level = logging.INFO) logging.info('AstNetwork Initialization: (hw,part): ' + str(self.part))
def GetClusterPath(self, assn, clusterId, label): resultsDir = FileSystem.getResultsDir() connectedCompDir = os.path.join(resultsDir, 'clusters', 'connectedComponents') filename = label + 'cluster_' + str(assn) + '_' + str( clusterId) + '.txt' fullpath = os.path.join(connectedCompDir, filename) if not os.path.exists(fullpath): print('Error: file was not found') print('\tpath: ' + fullpath) sys.exit(1) else: return fullpath
def getGraph(self, astSet, assn, threshold, label): part = assn.getTuple() #filteredGraph = nx.Graph() filteredGraph = igraph.Graph() distanceMatrix = FileSystem.loadDistanceMatrix(part, False) subIdMap = FileSystem.loadSubmissionIdMap(part) lookup = {} for key, idx in zip(subIdMap, range(len(subIdMap))): if int(key) in astSet: numStudents = len(subIdMap[key]) #filteredGraph.add_node(key, {'weight': numStudents}) filteredGraph.add_vertex(label=key, weight=numStudents) lookup[key] = filteredGraph.vs.find(label=int(key)) row = 0 toAdd = {} while True: logging.info('assn: ' + str(assn) + ', ' + label + ', row: ' + str(row)) line = distanceMatrix.readline() if not line: break if not row in astSet: row += 1 continue rowValues = map(int, line.strip().split()) for col in range(row + 1, len(rowValues)): if not col in astSet: continue value = rowValues[col] if value >= 0 and value <= threshold: toAdd[(lookup[row], lookup[col])] = value #filteredGraph.add_edge(row, col, {'edits': value}) row += 1 logging.info('Oh... one more thing.') filteredGraph.add_edges(toAdd.keys()) filteredGraph.es['edits'] = toAdd.values() return filteredGraph
def test(self): databaseDir = FileSystem.getDistanceMatrixDir() newFileName = 'dist_1_1.sparse50.pickle' newPath = os.path.join(databaseDir, newFileName) d = Distances(newPath) for i in range(5): values = [] for j in range(5): value = 0 if d.hasDistance(i, j): value = d.getDistance(i, j) values.append(value) print values
def saveOverallStat(self, statName, values): dir = os.path.join(FileSystem.getResultsDir(), 'syntaxStatistics') dir = os.path.join(dir, 'overallStats') if not os.path.exists(dir): os.makedirs(dir) histogram = numpy.histogram(values, HISTOGRAM_BUCKETS, range=(0, 1)) histogramMatrix = [] for index in range(len(histogram[0])): x = histogram[1][index] y = histogram[0][index] histogramMatrix.append([x, y]) histogramPath = os.path.join(dir, statName + 'Hist.csv') valuesPath = os.path.join(dir, statName + 'Values.csv') numpy.savetxt(valuesPath, values, delimiter=",") numpy.savetxt(histogramPath, histogramMatrix, delimiter=",")
def run(self, assn, threshold): logDir = os.path.join(FileSystem.getLogDir(), 'MakeGraph') if not os.path.exists(logDir): os.makedirs(logDir) logFileName = os.path.join(logDir, 'log') logging.basicConfig(filename = logFileName, format = '%(asctime)s %(message)s', \ datefmt = '%m/%d/%Y %I:%M:%S %p', level = logging.INFO) labels = ['incorrects', 'corrects'] for label in labels: asts = self.getAsts(assn, label) graph = self.getGraph(asts, assn, threshold, label) outPath = self.getOutputFilePath(assn, threshold, label) logging.info('write graph: ' + outPath) graph.save(outPath) logging.info('done.')
def __init__(self, assn): self.equivDir = os.path.join(FileSystem.getDataDir(), \ 'equivalence','equivalence_' + str(assn)) self.levels = [] for d in os.listdir(self.equivDir): try: self.levels.append(int(d)) except ValueError: pass self.classes = {} self.levelMap = {} for level in self.levels: for d in os.listdir(os.path.join(self.equivDir, str(level))): prefix = d.split('.')[0] self.levelMap[int(prefix)] = level try: self.classes[level].append(int(prefix)) except KeyError: self.classes[level] = [int(prefix)] except ValueError: pass
def makeSparse(self, hwPart): databaseDir = FileSystem.getDistanceMatrixDir() hwString = str(hwPart[0]) + '_' + str(hwPart[1]) fileName = 'dist_' + hwString + '.txt' newFileName = 'dist_' + hwString + '.sparse' + str( MAX_VALUE) + '.pickle' matrixFile = open(os.path.join(databaseDir, fileName)) matrixMap = mmap.mmap(matrixFile.fileno(), 0, access=mmap.ACCESS_READ) newPath = os.path.join(databaseDir, newFileName) d = Distances() row = 0 while True: line = matrixMap.readline() if not line: break rowValues = map(int, line.strip().split()) for col in range(row + 1, len(rowValues)): value = rowValues[col] if value != -1 and value <= MAX_VALUE: d.add(row, col, value) row += 1 d.save(newPath)
def getGraphPath(self, assn, threshold, label): dataDir = FileSystem.getDataDir() incorrectsDir = os.path.join(dataDir, 'incorrects') filename = label + 'Graph.' + str(assn) + '.sparse' + str( threshold) + '.gml' return os.path.join(incorrectsDir, filename)
import sys import os.path sys.path.append(os.path.abspath('../../../')) from distutils.core import setup, Extension from src.util.FileSystem import FileSystem extDir = FileSystem.getExtDir() jsonPath = os.path.join(extDir, 'SimpleJSON-master/src') jsonObj1Path = os.path.join(extDir, 'SimpleJSON-master/obj/JSON.o') jsonObj2Path = os.path.join(extDir, 'SimpleJSON-master/obj/JSONValue.o') module1 = Extension( 'PyMatch', sources = ['PyMatch.cpp', 'Match.cpp'], extra_objects = [jsonObj1Path, jsonObj2Path], include_dirs = ['.', jsonPath], library_dirs = [], libraries = [], extra_compile_args = ['-fPIC'] ) setup (name = 'PyMatch',version = '0.1',description = 'Matches octave code.',ext_modules = [module1], packages = [])
print('Result: ' + resultstr[sourceResult]) print('------------------------') Printer.mask(M.source, diffSourceMap) print('\nProblem: ' + str(assn) + ', AST #' + str(targetId)) print('Result: ' + resultstr[targetResult]) print('------------------------') Printer.mask(M.target, diffTargetMap) if __name__ == '__main__': try: hwId = int(sys.argv[1]) partId = int(sys.argv[2]) sourceId = int(sys.argv[3]) if len(sys.argv) > 4: targetId = int(sys.argv[4]) else: print('asdf') NNmap = FileSystem.loadNearestNeighbors((hwId, partId)) targetId = NNmap[sourceId][0] except: print('Usage: python compareCode.py hwId partId sourceId [targetId]') sys.exit(1) run(Assignment(hwId, partId), sourceId, targetId) #hwId = 3 #partId = 3 #sourceId = 0 #targetId = 7221