示例#1
0
    def notice(self, message, type=None):
        if self._silent:
            return

        if type is None:
            print '%s\n' % message
        else:
            tb.notice(message, type)
示例#2
0
    def notice(self, message, type=None):
        if self._silent:
            return

        if type is None:
            print '%s\n' % message
        else:
            tb.notice(message, type)
示例#3
0
文件: PyFlow.py 项目: haeusser/caffe
    def clean(self):
        nCleaned = 0
        for m in self._methods:
            for ds in self._datasets:
                for ent in ds.bents():
                    if ent.bind(m).clean():
                        nCleaned += 1

        tb.notice("cleaned %d entries" % (nCleaned), "passed")
示例#4
0
文件: PyFlow.py 项目: haeusser/caffe
    def clean(self):
        nCleaned = 0
        for m in self._methods:
            for ds in self._datasets:
                for ent in ds.bents():
                    if ent.bind(m).clean():
                        nCleaned += 1

        tb.notice('cleaned %d entries' % (nCleaned), 'passed')
示例#5
0
    def archive(self, src, target, iter_step=-1):
        basename = os.path.basename(src)
        targetPath = os.path.join(target, basename)

        if os.path.exists(targetPath):
            raise Exception("target path %s already exists" % targetPath)

        env = Environment(src,backend=self._backend, unattended=self._unattended, silent=self._silent)
        env.init()
        env.shrink(iter_step=iter_step)
        tb.notice('archiving %s to %s' % (src, targetPath), 'run')
        os.system('mv %s %s' % (src, targetPath))
示例#6
0
文件: PyFlow.py 项目: haeusser/caffe
    def check(self):
        nTotal = 0
        nOk = 0
        for m in self._methods:
            for ds in self._datasets:
                for ent in ds.uents() if m.direction() == "" else ds.bents():
                    if ent.bind(m).checkOut(self._args.verbose):
                        nOk += 1
                    nTotal += 1

        if nOk == nTotal:
            tb.notice("(%d/%d) passed" % (nOk, nTotal), "passed")
        else:
            tb.notice("(%d/%d) passed" % (nOk, nTotal), "failed")
示例#7
0
文件: PyFlow.py 项目: haeusser/caffe
    def update(self):
        queue = tb.Queue()

        for m in self._methods:
            for ds in self._datasets:
                tb.notice('creating jobs for <%s> on <%s>' % (m, ds))
                for ent in ds.uents() if m.direction() == '' else ds.bents():
                    job = tb.Job()
                    ent.bind(m).makeUpdateJob(job)
                    queue.postJob(job)

            queue.finishPacket()

        queue.submit(local=self._args.local, cores=self._args.cores)
示例#8
0
文件: PyFlow.py 项目: haeusser/caffe
    def check(self):
        nTotal = 0
        nOk = 0
        for m in self._methods:
            for ds in self._datasets:
                for ent in ds.uents() if m.direction() == '' else ds.bents():
                    if ent.bind(m).checkOut(self._args.verbose):
                        nOk += 1
                    nTotal += 1

        if nOk == nTotal:
            tb.notice('(%d/%d) passed' % (nOk, nTotal), 'passed')
        else:
            tb.notice('(%d/%d) passed' % (nOk, nTotal), 'failed')
示例#9
0
文件: PyFlow.py 项目: haeusser/caffe
    def update(self):
        queue = tb.Queue()

        for m in self._methods:
            for ds in self._datasets:
                tb.notice("creating jobs for <%s> on <%s>" % (m, ds))
                for ent in ds.uents() if m.direction() == "" else ds.bents():
                    job = tb.Job()
                    ent.bind(m).makeUpdateJob(job)
                    queue.postJob(job)

            queue.finishPacket()

        queue.submit(local=self._args.local, cores=self._args.cores)
示例#10
0
    def archive(self, src, target, iter_step=-1):
        basename = os.path.basename(src)
        targetPath = os.path.join(target, basename)

        if os.path.exists(targetPath):
            raise Exception("target path %s already exists" % targetPath)

        env = Environment(src,
                          backend=self._backend,
                          unattended=self._unattended,
                          silent=self._silent)
        env.init()
        env.shrink(iter_step=iter_step)
        tb.notice('archiving %s to %s' % (src, targetPath), 'run')
        os.system('mv %s %s' % (src, targetPath))
示例#11
0
    def prototxt(self, inFile, outDir, defs={}):
        defs['name'] = self._name

        if not os.path.isfile(inFile):
            raise Exception('input file %s not file' % inFile)

        if inFile.endswith('.prototxt'):
            os.system('cp %s %s' % (inFile, outDir))
            return '%s' % (inFile)
        elif inFile.endswith('.prototmp'):
            prototxt = '%s/%s.prototxt' % (outDir, os.path.basename(inFile).replace('.prototmp', ''))
            if not self._silent: tb.notice('preprocessing %s' % inFile, 'run')
            tb.preprocessFile(inFile, prototxt, defs)
            return prototxt
        elif inFile.endswith('.py'):
            prototxt = '%s/%s.prototxt' % (outDir, os.path.basename(inFile).replace('.py', ''))
            args = ''
            for k, v in defs.iteritems():
                if len(args): args += ' '
                args += '%s=%s' % (k, v)

            if not self._silent:
                if not len(defs):
                    tb.notice('converting %s' % inFile, 'run')
                else:
                    tb.notice('converting %s (%s)' % (inFile, args), 'run')
            if os.system('python -B %s %s > %s' % (inFile, args, prototxt)) != 0:
                raise Exception('conversion of %s failed' % inFile)
            return prototxt
        else:
            raise Exception('don\'t know how to convert file %s to prototxt' % inFile)
示例#12
0
    def prototxt(self, inFile, outDir, defs={}):
        defs['name'] = self._name

        if not os.path.isfile(inFile):
            raise Exception('input file %s not file' % inFile)

        if inFile.endswith('.prototxt'):
            os.system('cp %s %s' % (inFile, outDir))
            return '%s' % (inFile)
        elif inFile.endswith('.prototmp'):
            prototxt = '%s/%s.prototxt' % (
                outDir, os.path.basename(inFile).replace('.prototmp', ''))
            if not self._silent: tb.notice('preprocessing %s' % inFile, 'run')
            tb.preprocessFile(inFile, prototxt, defs)
            return prototxt
        elif inFile.endswith('.py'):
            prototxt = '%s/%s.prototxt' % (
                outDir, os.path.basename(inFile).replace('.py', ''))
            args = ''
            for k, v in defs.iteritems():
                if len(args): args += ' '
                args += '%s=%s' % (k, v)

            if not self._silent:
                if not len(defs):
                    tb.notice('converting %s' % inFile, 'run')
                else:
                    tb.notice('converting %s (%s)' % (inFile, args), 'run')
            if os.system('python -B %s %s > %s' %
                         (inFile, args, prototxt)) != 0:
                raise Exception('conversion of %s failed' % inFile)
            return prototxt
        else:
            raise Exception('don\'t know how to convert file %s to prototxt' %
                            inFile)
示例#13
0
    def shrink(self, iter_step):
        self.notice('removing *.pyc', 'del')
        os.system('rm -f %s/*.pyc' % (self._path))

        self.notice('removing scratch', 'del')
        os.system('rm -rf %s/scratch' % self._path)

        if self.haveJobDir():
            self.notice('removing jobs', 'del')
            os.system('rm -rf %s' % self._jobDir)

        self.sweep()

        if self.haveTrainDir():
            for file in self._stateFiles:
                keep = False
                if iter_step != -1:
                    if file.iteration() % iter_step == 0:
                        keep = True
                if file == self._stateFiles[-1]:
                    keep = True

                modelFile = None
                for f in self._modelFiles:
                    if f.iteration() == file.iteration():
                        modelFile = f

                if keep:
                    if modelFile is not None:
                        tb.notice(
                            'keeping file %s' %
                            (os.path.basename(file.filename())), 'passed')
                        tb.notice(
                            'keeping file %s' %
                            (os.path.basename(modelFile.filename())), 'passed')
                    else:
                        tb.notice('keeping file %s' % file, 'passed')
                else:
                    if modelFile is not None:
                        modelFile.delete(True)
                        file.delete(True)
                    else:
                        file.delete(True)
示例#14
0
    def _callCopiedBin(self, cmd):
        bin = './' + os.path.basename(caffeBin())
        tb.notice('making a local copy of %s' % caffeBin())
        os.system('cp %s .' % caffeBin())

        ldd = tb.run('ldd %s' % caffeBin())
        caffeLib = None
        for line in ldd.split('\n'):
            match = re.match('\\s*libcaffe.so => (.*\.so)', line)
            if match:
                caffeLib = match.group(1)
                break
        if caffeLib is None:
            raise Exception('cannot find libcaffe.so dependency')

        tb.notice('making a local copy of %s' % caffeLib)
        os.system('cp %s .' % caffeLib)

        cmd = 'GLOG_logtostderr=%d LD_LIBRARY_PATH=.:$LD_LIBRARY_PATH %s %s' % (not self._quiet, bin, cmd)
        if not self._silent:
            tb.notice('running "%s"' % cmd, 'run')
        tb.system(cmd)
示例#15
0
    def shrink(self, iter_step):
        self.notice('removing *.pyc', 'del')
        os.system('rm -f %s/*.pyc' % (self._path))

        self.notice('removing scratch', 'del')
        os.system('rm -rf %s/scratch' % self._path)

        if self.haveJobDir():
            self.notice('removing jobs', 'del')
            os.system('rm -rf %s' % self._jobDir)

        self.sweep()

        if self.haveTrainDir():
            for file in self._stateFiles:
                keep = False
                if iter_step != -1:
                    if file.iteration() % iter_step == 0:
                        keep = True
                if file == self._stateFiles[-1]:
                    keep = True

                modelFile = None
                for f in self._modelFiles:
                    if f.iteration() == file.iteration():
                        modelFile = f

                if keep:
                    if modelFile is not None:
                        tb.notice('keeping file %s' % (os.path.basename(file.filename())),'passed')
                        tb.notice('keeping file %s' % (os.path.basename(modelFile.filename())),'passed')
                    else: tb.notice('keeping file %s' % file,'passed')
                else:
                    if modelFile is not None:
                        modelFile.delete(True)
                        file.delete(True)
                    else:
                        file.delete(True)
示例#16
0
    def _callCopiedBin(self, cmd):
        bin = './' + os.path.basename(caffeBin())
        tb.notice('making a local copy of %s' % caffeBin())
        os.system('cp %s .' % caffeBin())

        ldd = tb.run('ldd %s' % caffeBin())
        caffeLib = None
        for line in ldd.split('\n'):
            match = re.match('\\s*libcaffe.so => (.*\.so)', line)
            if match:
                caffeLib = match.group(1)
                break
        if caffeLib is None:
            raise Exception('cannot find libcaffe.so dependency')

        tb.notice('making a local copy of %s' % caffeLib)
        os.system('cp %s .' % caffeLib)

        cmd = 'GLOG_logtostderr=%d LD_LIBRARY_PATH=.:$LD_LIBRARY_PATH %s %s' % (
            not self._quiet, bin, cmd)
        if not self._silent:
            tb.notice('running "%s"' % cmd, 'run')
        tb.system(cmd)
示例#17
0
 def _callBin(self, cmd):
     cmd = 'GLOG_logtostderr=%d %s %s' % (not self._quiet, caffeBin(), cmd)
     if not self._silent:
         tb.notice('running "%s"' % cmd, 'run')
     tb.system(cmd)
示例#18
0
 def _callBin(self, cmd):
     cmd = 'GLOG_logtostderr=%d %s %s' % (not self._quiet, caffeBin(), cmd)
     if not self._silent:
         tb.notice('running "%s"' % cmd, 'run')
     tb.system(cmd)
示例#19
0
文件: Files.py 项目: haeusser/caffe
 def delete(self,verbose=False):
     if verbose:
         tb.notice('removing %s' % self._filename, 'del')
     os.remove(self._filename)
示例#20
0
文件: PyCNN.py 项目: haeusser/caffe
def runOnCluster(env, node, gpus, background, insertLocal=True, trackJob=True):
    gpuArch = env.params().gpuArch()
    if node is not None:
        tb.notice(
            "Forwarding job to cluster node %s with %d gpu(s) which are of type %s" % (node, gpus, gpuArch), "info"
        )
    else:
        tb.notice("Forwarding job to cluster with %d gpu(s) which are of type %s" % (gpus, gpuArch), "info")

    env.makeJobDir()

    currentId = "%s/current_id" % env.jobDir()
    if trackJob and os.path.exists(currentId):
        raise Exception("%s exists, there seems to be a job already running" % currentId)

    sysargs = sys.argv
    if insertLocal:
        sysargs.insert(1, "--execute")
    cmd = " ".join(sysargs)
    home = os.environ["HOME"]

    if args.backend == "python":
        training = os.path.abspath("training")
        cmd = "LD_LIBRARY_PATH=%s:$LD_LIBRARY_PATH PYTHONPATH=%s:$PYTHONPATH %s" % (training, training, cmd)

    qsubCommandFile = "%s/%s-%s.sh" % (env.jobDir(), env.name().replace("/", "_"), time.strftime("%d.%m.%Y-%H:%M:%S"))

    epilogueScript = "%s/epilogue.sh" % env.jobDir()
    open(epilogueScript, "w").write("#!/bin/bash\ncd $path\nrm -f jobs/current_id\n")

    if trackJob:
        saveIdCommand = "echo $$PBS_JOBID > jobs/current_id"
    else:
        saveIdCommand = ""

    script = Template(
        "#!/bin/bash\n"
        "\n"
        "umask 0002\n"
        'echo -e "\e[30;42m --- running on" `hostname` "--- \e[0m"\n'
        'cd "$path"\n'
        "$saveIdCommand\n"
        'trap "echo got SIGHUP" SIGHUP\n'
        'trap "echo got SIGUSR1" USR1\n'
        "$command\n"
        "echo done\n"
        "rm -f jobs/current_id\n"
    ).substitute(path=env.path(), command=cmd, saveIdCommand=saveIdCommand)

    open(qsubCommandFile, "w").write(script)
    tb.system('chmod a+x "%s"' % qsubCommandFile)

    qsub = "qsub -l nodes=%s:gpus=%d%s,mem=%dmb,walltime=240:00:00 %s -q gpujob -d %s %s -N %s -T %s" % (
        node if node is not None else "1",
        gpus,
        (":" + gpuArch) if gpuArch != "any" else "",
        env.params().requiredMemory(),
        "-I -x" if not background else "",
        env.path(),
        qsubCommandFile,
        env.name(),
        epilogueScript,
    )

    if background:
        print "job name: %s" % os.path.basename(qsubCommandFile)
        qsub += " -j oe -o %s" % (env.jobDir())

    tb.notice("lmbtorque: running %s" % qsub, "run")

    if not background:
        tb.system('ssh lmbtorque "umask 0002; cd %s; %s;  rm -f jobs/current_id"' % (env.path(), qsub))
    else:
        tb.system('ssh lmbtorque "umask 0002; %s"' % (qsub))
    sys.exit(0)
示例#21
0
def runOnCluster(env, node, gpus, background, insertLocal=True, trackJob=True):
    gpuArch = env.params().gpuArch()
    if node is not None:
        tb.notice(
            'Forwarding job to cluster node %s with %d gpu(s) which are of type %s'
            % (node, gpus, gpuArch), 'info')
    else:
        tb.notice(
            'Forwarding job to cluster with %d gpu(s) which are of type %s' %
            (gpus, gpuArch), 'info')

    env.makeJobDir()

    currentId = '%s/current_id' % env.jobDir()
    if trackJob and os.path.exists(currentId):
        raise Exception('%s exists, there seems to be a job already running' %
                        currentId)

    sysargs = sys.argv
    if insertLocal:
        sysargs.insert(1, '--execute')
    cmd = ' '.join(sysargs)
    home = os.environ['HOME']

    if args.backend == 'python':
        training = os.path.abspath('training')
        cmd = 'LD_LIBRARY_PATH=%s:$LD_LIBRARY_PATH PYTHONPATH=%s:$PYTHONPATH %s' % (
            training, training, cmd)

    qsubCommandFile = '%s/%s-%s.sh' % (env.jobDir(), env.name().replace(
        '/', '_'), time.strftime('%d.%m.%Y-%H:%M:%S'))

    epilogueScript = '%s/epilogue.sh' % env.jobDir()
    open(epilogueScript,
         'w').write("#!/bin/bash\ncd $path\nrm -f jobs/current_id\n")

    if trackJob: saveIdCommand = 'echo $$PBS_JOBID > jobs/current_id'
    else: saveIdCommand = ''

    script = Template(
        '#!/bin/bash\n'
        '\n'
        'umask 0002\n'
        'echo -e "\e[30;42m --- running on" `hostname` "--- \e[0m"\n'
        'cd "$path"\n'
        '$saveIdCommand\n'
        'trap "echo got SIGHUP" SIGHUP\n'
        'trap "echo got SIGUSR1" USR1\n'
        '$command\n'
        'echo done\n'
        'rm -f jobs/current_id\n').substitute(path=env.path(),
                                              command=cmd,
                                              saveIdCommand=saveIdCommand)

    open(qsubCommandFile, 'w').write(script)
    tb.system('chmod a+x "%s"' % qsubCommandFile)

    qsub = 'qsub -l nodes=%s:gpus=%d%s,mem=%dmb,walltime=240:00:00 %s -q gpujob -d %s %s -N %s -T %s' % (
        node if node is not None else '1', gpus,
        (':' + gpuArch) if gpuArch != 'any' else '',
        env.params().requiredMemory(), '-I -x' if not background else '',
        env.path(), qsubCommandFile, env.name(), epilogueScript)

    if background:
        print 'job name: %s' % os.path.basename(qsubCommandFile)
        qsub += ' -j oe -o %s' % (env.jobDir())

    tb.notice("lmbtorque: running %s" % qsub, 'run')

    if not background:
        tb.system(
            'ssh lmbtorque "umask 0002; cd %s; %s;  rm -f jobs/current_id"' %
            (env.path(), qsub))
    else:
        tb.system('ssh lmbtorque "umask 0002; %s"' % (qsub))
    sys.exit(0)