def mainOptions(options, convertFunction=lambda x: x): try: #options.saveFile = getSaveFilename(options) filename = combineConfigs(options.saveFile, options, convertFunction) targetData = getFilename(options.targetBase, options.student, TRAIN) if (options.numSource == 0) or not (options.useSource): sourceData = [] else: sourceData = [ getFilename(options.sourceBase, s, TRAIN) for s in options.otherStudents ] # run the cmd print 'save config will be at', options.saveConfigFilename cmd = [ 'bin/%s/trainClassifier' % getArch(), filename, options.saveFile, targetData ] + sourceData if options.fracSourceData is not None: cmd += ['--fracSourceData', str(options.fracSourceData)] if options.debug: print ' '.join(cmd) if options.catchOutput: p = subprocess.Popen(cmd, stdout=subprocess.PIPE) output, error = p.communicate() else: subprocess.check_call(cmd) finally: if not (options.debug): try: os.remove(filename) except: pass if options.catchOutput: return output, error
def main(targetDir,sourceDir,inPrefix,outPrefix,studentInd,origNumSource): #sourceDir = 'data/dt/perturbed-noop0.1-50000' #targetDir = 'data/dt/perturbed-noop0.1-1000' inFilename = inPrefix + '-%s.desc' outDesc = outPrefix + '-%s.desc' outTree = outPrefix + '-%s.weka' studentFile = 'data/newStudents29.txt' factor = origNumSource / (4 * 50000.0) students = getUniqueStudents(studentFile) #maxLength = 0 for i,student in enumerate(students): if (studentInd is not None) and (i != studentInd): continue filename = os.path.join(targetDir,'desc',inFilename % student) #print student res = parseDesc(filename) #print res if res is None: print >>sys.stderr,'SKIPPING',student continue #maxLength = max(maxLength,len(res)) cmd = ['bin/%s/boostWeights' % getArch(),student,targetDir,sourceDir] for k,v in res.iteritems(): cmd += [k,str(v * factor)] #print ' '.join(cmd) outDescPath = os.path.join(targetDir,'desc',outDesc % student) outTreePath = os.path.join(targetDir,'weighted',outTree % student) subprocess.check_call(cmd,stdout=open(outDescPath,'w')) extractTree(outDescPath,outTreePath)
def mainOptions(options, convertFunction=lambda x: x): try: # options.saveFile = getSaveFilename(options) filename = combineConfigs(options.saveFile, options, convertFunction) targetData = getFilename(options.targetBase, options.student, TRAIN) if (options.numSource == 0) or not (options.useSource): sourceData = [] else: sourceData = [getFilename(options.sourceBase, s, TRAIN) for s in options.otherStudents] # run the cmd print "save config will be at", options.saveConfigFilename cmd = ["bin/%s/trainClassifier" % getArch(), filename, options.saveFile, targetData] + sourceData if options.fracSourceData is not None: cmd += ["--fracSourceData", str(options.fracSourceData)] if options.debug: print " ".join(cmd) if options.catchOutput: p = subprocess.Popen(cmd, stdout=subprocess.PIPE) output, error = p.communicate() else: subprocess.check_call(cmd) finally: if not (options.debug): try: os.remove(filename) except: pass if options.catchOutput: return output, error
def main(targetDir, sourceDir, inPrefix, outPrefix, studentInd, origNumSource): #sourceDir = 'data/dt/perturbed-noop0.1-50000' #targetDir = 'data/dt/perturbed-noop0.1-1000' inFilename = inPrefix + '-%s.desc' outDesc = outPrefix + '-%s.desc' outTree = outPrefix + '-%s.weka' studentFile = 'data/newStudents29.txt' factor = origNumSource / (4 * 50000.0) students = getUniqueStudents(studentFile) #maxLength = 0 for i, student in enumerate(students): if (studentInd is not None) and (i != studentInd): continue filename = os.path.join(targetDir, 'desc', inFilename % student) #print student res = parseDesc(filename) #print res if res is None: print >> sys.stderr, 'SKIPPING', student continue #maxLength = max(maxLength,len(res)) cmd = [ 'bin/%s/boostWeights' % getArch(), student, targetDir, sourceDir ] for k, v in res.iteritems(): cmd += [k, str(v * factor)] #print ' '.join(cmd) outDescPath = os.path.join(targetDir, 'desc', outDesc % student) outTreePath = os.path.join(targetDir, 'weighted', outTree % student) subprocess.check_call(cmd, stdout=open(outDescPath, 'w')) extractTree(outDescPath, outTreePath)
def main(args=sys.argv[1:]): options = parseArgs(args) testFile = getFilename(options.testBase, options.student, TRAIN) cmd = [ 'bin/%s/runClassifier' % getArch(), options.saveConfigFilename, testFile, str(options.numTest), '--notrain' ] subprocess.check_call(cmd)
def main(targetDir,sourceDir,destDir,prefix,studentInd): students = getUniqueStudents(studentFile) prefix = prefix + '-using%sSource' % (sourceData if sourceData > 0 else 'All') for i,student in enumerate(students): if (studentInd is not None) and (i != studentInd): continue print '-------------------' print student print '-------------------' cmd = ['bin/%s/boostTest' % getArch(),student,studentFile,targetDir,sourceDir,str(sourceData)] descFile = os.path.join(destDir,'desc',prefix + '-' + student + '.desc') resultFile = os.path.join(destDir,'weighted',prefix + '-' + student + '.weka') subprocess.check_call(cmd,stdout=open(descFile,'w')) extractTree(descFile,resultFile)
def main(targetDir,sourceDir,destDir,prefix,maxNumBoosts,numTargetInstances,studentInd): students = getUniqueStudents(studentFile) for i,student in enumerate(students): if (studentInd is not None) and (i != studentInd): continue print '-------------------' print student print '-------------------' orderFile = os.path.join(destDir,'desc','combined-%s.desc' % student) cmd = ['bin/%s/boostGivenOrder' % getArch(),student,orderFile,targetDir,sourceDir,str(maxNumBoosts),str(numTargetInstances)] descFile = os.path.join(destDir,'desc',prefix + '-' + student + '.desc') resultFile = os.path.join(destDir,'weighted',prefix + '-' + student + '.weka') subprocess.check_call(cmd,stdout=open(descFile,'w')) extractTree(descFile,resultFile)
def main(targetDir,sourceDir,destDir,numTargetInstances,jobInd): students = getUniqueStudents(studentFile) i = -1 for targetStudent in students: for sourceStudent in students: if (targetStudent == sourceStudent): continue i += 1 if (jobInd is not None) and (i != jobInd): continue print targetStudent, sourceStudent cmd = ['bin/%s/boostIndependent' % getArch(),targetStudent,sourceStudent,targetDir,sourceDir,str(numTargetInstances)] descFile = os.path.join(destDir,'boostIndependent',targetStudent + '-' + sourceStudent + '.desc') subprocess.check_call(cmd,stdout=open(descFile,'w'))
def main(targetDir, sourceDir, destDir, prefix, studentInd): students = getUniqueStudents(studentFile) prefix = prefix + '-using%sSource' % (sourceData if sourceData > 0 else 'All') for i, student in enumerate(students): if (studentInd is not None) and (i != studentInd): continue print '-------------------' print student print '-------------------' cmd = [ 'bin/%s/boostTest' % getArch(), student, studentFile, targetDir, sourceDir, str(sourceData) ] descFile = os.path.join(destDir, 'desc', prefix + '-' + student + '.desc') resultFile = os.path.join(destDir, 'weighted', prefix + '-' + student + '.weka') subprocess.check_call(cmd, stdout=open(descFile, 'w')) extractTree(descFile, resultFile)
def main(targetDir, sourceDir, destDir, prefix, maxNumBoosts, numTargetInstances, studentInd): students = getUniqueStudents(studentFile) for i, student in enumerate(students): if (studentInd is not None) and (i != studentInd): continue print '-------------------' print student print '-------------------' orderFile = os.path.join(destDir, 'desc', 'combined-%s.desc' % student) cmd = [ 'bin/%s/boostGivenOrder' % getArch(), student, orderFile, targetDir, sourceDir, str(maxNumBoosts), str(numTargetInstances) ] descFile = os.path.join(destDir, 'desc', prefix + '-' + student + '.desc') resultFile = os.path.join(destDir, 'weighted', prefix + '-' + student + '.weka') subprocess.check_call(cmd, stdout=open(descFile, 'w')) extractTree(descFile, resultFile)
def buildDT(dataFile,outFile,options,randomTree): if randomTree: raise ValueError('cannot handle randomTrees here') cmd = [os.path.join('bin',str(getArch()),'buildDT'),dataFile] + options subprocess.check_call(cmd,stdout=open(outFile,'w'))
def weightTree(inFile,dataFile,outFile): cmd = [os.path.join('bin',str(getArch()),'addWeights'),inFile,dataFile] subprocess.check_call(cmd,stdout=open(outFile,'w'))
def buildDT(dataFile, outFile, options, randomTree): if randomTree: raise ValueError('cannot handle randomTrees here') cmd = [os.path.join('bin', str(getArch()), 'buildDT'), dataFile] + options subprocess.check_call(cmd, stdout=open(outFile, 'w'))
def weightTree(inFile, dataFile, outFile): cmd = [os.path.join('bin', str(getArch()), 'addWeights'), inFile, dataFile] subprocess.check_call(cmd, stdout=open(outFile, 'w'))
def main(args = sys.argv[1:]): options = parseArgs(args) testFile = getFilename(options.testBase,options.student,TRAIN) cmd = ['bin/%s/runClassifier' % getArch(),options.saveConfigFilename,testFile,str(options.numTest),'--notrain'] subprocess.check_call(cmd)