Пример #1
0
def mainOptions(options, convertFunction=lambda x: x):
    try:
        #options.saveFile = getSaveFilename(options)
        filename = combineConfigs(options.saveFile, options, convertFunction)
        targetData = getFilename(options.targetBase, options.student, TRAIN)
        if (options.numSource == 0) or not (options.useSource):
            sourceData = []
        else:
            sourceData = [
                getFilename(options.sourceBase, s, TRAIN)
                for s in options.otherStudents
            ]
        # run the cmd
        print 'save config will be at', options.saveConfigFilename
        cmd = [
            'bin/%s/trainClassifier' % getArch(), filename, options.saveFile,
            targetData
        ] + sourceData
        if options.fracSourceData is not None:
            cmd += ['--fracSourceData', str(options.fracSourceData)]
        if options.debug:
            print ' '.join(cmd)
        if options.catchOutput:
            p = subprocess.Popen(cmd, stdout=subprocess.PIPE)
            output, error = p.communicate()
        else:
            subprocess.check_call(cmd)
    finally:
        if not (options.debug):
            try:
                os.remove(filename)
            except:
                pass
    if options.catchOutput:
        return output, error
Пример #2
0
def main(targetDir,sourceDir,inPrefix,outPrefix,studentInd,origNumSource):
  #sourceDir = 'data/dt/perturbed-noop0.1-50000'
  #targetDir = 'data/dt/perturbed-noop0.1-1000'
  inFilename = inPrefix + '-%s.desc'
  outDesc = outPrefix + '-%s.desc'
  outTree = outPrefix + '-%s.weka'
  studentFile = 'data/newStudents29.txt'

  factor = origNumSource / (4 * 50000.0) 
  students = getUniqueStudents(studentFile)
  #maxLength = 0
  for i,student in enumerate(students):
    if (studentInd is not None) and (i != studentInd):
      continue
    filename = os.path.join(targetDir,'desc',inFilename % student)
    #print student
    res = parseDesc(filename)
    #print res
    if res is None:
      print >>sys.stderr,'SKIPPING',student
      continue
    #maxLength = max(maxLength,len(res))

    cmd = ['bin/%s/boostWeights' % getArch(),student,targetDir,sourceDir]
    for k,v in res.iteritems():
      cmd += [k,str(v * factor)]
    #print ' '.join(cmd)
    outDescPath = os.path.join(targetDir,'desc',outDesc % student)
    outTreePath = os.path.join(targetDir,'weighted',outTree % student)
    subprocess.check_call(cmd,stdout=open(outDescPath,'w'))
    extractTree(outDescPath,outTreePath)
Пример #3
0
def mainOptions(options, convertFunction=lambda x: x):
    try:
        # options.saveFile = getSaveFilename(options)
        filename = combineConfigs(options.saveFile, options, convertFunction)
        targetData = getFilename(options.targetBase, options.student, TRAIN)
        if (options.numSource == 0) or not (options.useSource):
            sourceData = []
        else:
            sourceData = [getFilename(options.sourceBase, s, TRAIN) for s in options.otherStudents]
        # run the cmd
        print "save config will be at", options.saveConfigFilename
        cmd = ["bin/%s/trainClassifier" % getArch(), filename, options.saveFile, targetData] + sourceData
        if options.fracSourceData is not None:
            cmd += ["--fracSourceData", str(options.fracSourceData)]
        if options.debug:
            print " ".join(cmd)
        if options.catchOutput:
            p = subprocess.Popen(cmd, stdout=subprocess.PIPE)
            output, error = p.communicate()
        else:
            subprocess.check_call(cmd)
    finally:
        if not (options.debug):
            try:
                os.remove(filename)
            except:
                pass
    if options.catchOutput:
        return output, error
Пример #4
0
def main(targetDir, sourceDir, inPrefix, outPrefix, studentInd, origNumSource):
    #sourceDir = 'data/dt/perturbed-noop0.1-50000'
    #targetDir = 'data/dt/perturbed-noop0.1-1000'
    inFilename = inPrefix + '-%s.desc'
    outDesc = outPrefix + '-%s.desc'
    outTree = outPrefix + '-%s.weka'
    studentFile = 'data/newStudents29.txt'

    factor = origNumSource / (4 * 50000.0)
    students = getUniqueStudents(studentFile)
    #maxLength = 0
    for i, student in enumerate(students):
        if (studentInd is not None) and (i != studentInd):
            continue
        filename = os.path.join(targetDir, 'desc', inFilename % student)
        #print student
        res = parseDesc(filename)
        #print res
        if res is None:
            print >> sys.stderr, 'SKIPPING', student
            continue
        #maxLength = max(maxLength,len(res))

        cmd = [
            'bin/%s/boostWeights' % getArch(), student, targetDir, sourceDir
        ]
        for k, v in res.iteritems():
            cmd += [k, str(v * factor)]
        #print ' '.join(cmd)
        outDescPath = os.path.join(targetDir, 'desc', outDesc % student)
        outTreePath = os.path.join(targetDir, 'weighted', outTree % student)
        subprocess.check_call(cmd, stdout=open(outDescPath, 'w'))
        extractTree(outDescPath, outTreePath)
Пример #5
0
def main(args=sys.argv[1:]):
    options = parseArgs(args)
    testFile = getFilename(options.testBase, options.student, TRAIN)
    cmd = [
        'bin/%s/runClassifier' % getArch(), options.saveConfigFilename,
        testFile,
        str(options.numTest), '--notrain'
    ]
    subprocess.check_call(cmd)
Пример #6
0
def main(targetDir,sourceDir,destDir,prefix,studentInd):
  students = getUniqueStudents(studentFile)
  prefix = prefix + '-using%sSource' % (sourceData if sourceData > 0 else 'All')
  for i,student in enumerate(students):
    if (studentInd is not None) and (i != studentInd):
      continue
    print '-------------------'
    print student
    print '-------------------'
    cmd = ['bin/%s/boostTest' % getArch(),student,studentFile,targetDir,sourceDir,str(sourceData)]
    descFile = os.path.join(destDir,'desc',prefix + '-' + student + '.desc')
    resultFile = os.path.join(destDir,'weighted',prefix + '-' + student + '.weka')
    subprocess.check_call(cmd,stdout=open(descFile,'w'))
    extractTree(descFile,resultFile)
def main(targetDir,sourceDir,destDir,prefix,maxNumBoosts,numTargetInstances,studentInd):
  students = getUniqueStudents(studentFile)
  for i,student in enumerate(students):
    if (studentInd is not None) and (i != studentInd):
      continue
    print '-------------------'
    print student
    print '-------------------'
    orderFile = os.path.join(destDir,'desc','combined-%s.desc' % student)
    cmd = ['bin/%s/boostGivenOrder' % getArch(),student,orderFile,targetDir,sourceDir,str(maxNumBoosts),str(numTargetInstances)]
    descFile = os.path.join(destDir,'desc',prefix + '-' + student + '.desc')
    resultFile = os.path.join(destDir,'weighted',prefix + '-' + student + '.weka')
    subprocess.check_call(cmd,stdout=open(descFile,'w'))
    extractTree(descFile,resultFile)
Пример #8
0
def main(targetDir,sourceDir,destDir,numTargetInstances,jobInd):
  students = getUniqueStudents(studentFile)
  i = -1
  for targetStudent in students:
    for sourceStudent in students:
      if (targetStudent == sourceStudent):
        continue
      i += 1
      if (jobInd is not None) and (i != jobInd):
        continue
      print targetStudent, sourceStudent
      cmd = ['bin/%s/boostIndependent' % getArch(),targetStudent,sourceStudent,targetDir,sourceDir,str(numTargetInstances)]
      descFile = os.path.join(destDir,'boostIndependent',targetStudent + '-' + sourceStudent + '.desc')
      subprocess.check_call(cmd,stdout=open(descFile,'w'))
Пример #9
0
def main(targetDir, sourceDir, destDir, prefix, studentInd):
    students = getUniqueStudents(studentFile)
    prefix = prefix + '-using%sSource' % (sourceData
                                          if sourceData > 0 else 'All')
    for i, student in enumerate(students):
        if (studentInd is not None) and (i != studentInd):
            continue
        print '-------------------'
        print student
        print '-------------------'
        cmd = [
            'bin/%s/boostTest' % getArch(), student, studentFile, targetDir,
            sourceDir,
            str(sourceData)
        ]
        descFile = os.path.join(destDir, 'desc',
                                prefix + '-' + student + '.desc')
        resultFile = os.path.join(destDir, 'weighted',
                                  prefix + '-' + student + '.weka')
        subprocess.check_call(cmd, stdout=open(descFile, 'w'))
        extractTree(descFile, resultFile)
Пример #10
0
def main(targetDir, sourceDir, destDir, prefix, maxNumBoosts,
         numTargetInstances, studentInd):
    students = getUniqueStudents(studentFile)
    for i, student in enumerate(students):
        if (studentInd is not None) and (i != studentInd):
            continue
        print '-------------------'
        print student
        print '-------------------'
        orderFile = os.path.join(destDir, 'desc', 'combined-%s.desc' % student)
        cmd = [
            'bin/%s/boostGivenOrder' % getArch(), student, orderFile,
            targetDir, sourceDir,
            str(maxNumBoosts),
            str(numTargetInstances)
        ]
        descFile = os.path.join(destDir, 'desc',
                                prefix + '-' + student + '.desc')
        resultFile = os.path.join(destDir, 'weighted',
                                  prefix + '-' + student + '.weka')
        subprocess.check_call(cmd, stdout=open(descFile, 'w'))
        extractTree(descFile, resultFile)
Пример #11
0
def buildDT(dataFile,outFile,options,randomTree):
  if randomTree:
    raise ValueError('cannot handle randomTrees here')
  cmd = [os.path.join('bin',str(getArch()),'buildDT'),dataFile] + options
  subprocess.check_call(cmd,stdout=open(outFile,'w'))
Пример #12
0
def weightTree(inFile,dataFile,outFile):
  cmd = [os.path.join('bin',str(getArch()),'addWeights'),inFile,dataFile]
  subprocess.check_call(cmd,stdout=open(outFile,'w'))
Пример #13
0
def buildDT(dataFile, outFile, options, randomTree):
    if randomTree:
        raise ValueError('cannot handle randomTrees here')
    cmd = [os.path.join('bin', str(getArch()), 'buildDT'), dataFile] + options
    subprocess.check_call(cmd, stdout=open(outFile, 'w'))
Пример #14
0
def weightTree(inFile, dataFile, outFile):
    cmd = [os.path.join('bin', str(getArch()), 'addWeights'), inFile, dataFile]
    subprocess.check_call(cmd, stdout=open(outFile, 'w'))
Пример #15
0
def main(args = sys.argv[1:]):
  options = parseArgs(args)
  testFile = getFilename(options.testBase,options.student,TRAIN)
  cmd = ['bin/%s/runClassifier' % getArch(),options.saveConfigFilename,testFile,str(options.numTest),'--notrain']
  subprocess.check_call(cmd)