Exemplo n.º 1
0
    def viewFilters(self, iter):
        self.prepareTraining()
        prototxt = self._trainDir + '/train.prototxt'
        modelFile, iter = self.getModelFile(iter)

        os.environ['LD_LIBRARY_PATH']="/misc/lmbraid17/sceneflownet/common/programs/torch/install/lib:/usr/lib/x86_64-linux-gnu:/misc/lmbraid17/sceneflownet/common/software-root/lib:/home/ilge/dev/hackathon-caffe2/build/lib:/misc/software-lin/Qt-5.3.2/5.3/gcc_64/lib:/misc/lmbraid17/sceneflownet/common/programs/torch/install/lib:/usr/lib/x86_64-linux-gnu:/misc/lmbraid17/sceneflownet/common/software-root/lib:/home/ilge/dev/hackathon-caffe2/build/lib:/misc/software-lin/Qt-5.3.2/5.3/gcc_64/lib::/home/ilge/lib:/misc/software-lin/lmbsoft/openni-1.5.2.23-x86_64/usr/lib:/misc/software-lin/lmbsoft/glog/lib:/misc/software-lin/lmbsoft/mkl/lib:/misc/software-lin/lmbsoft/mkl/lib/intel64:/misc/software-lin/lmbsoft/cuda-6.5.14-x86_64/lib64:/misc/software-lin/lmbsoft/cuda-6.0.37-x86_64/lib64:/misc/student/mayern/OpenNI-Bin-Dev-Linux-x64-v1.5.4.0/Lib:/home/ilge/lib:/misc/software-lin/lmbsoft/openni-1.5.2.23-x86_64/usr/lib:/misc/software-lin/lmbsoft/glog/lib:/misc/software-lin/lmbsoft/mkl/lib:/misc/software-lin/lmbsoft/mkl/lib/intel64:/misc/software-lin/lmbsoft/cuda-6.5.14-x86_64/lib64:/misc/software-lin/lmbsoft/cuda-6.0.37-x86_64/lib64:/misc/student/mayern/OpenNI-Bin-Dev-Linux-x64-v1.5.4.0/Lib"
        os.environ['PATH']="/home/ilge/bin:/home/ilge/dev/pymill/bin:/misc/lmbraid17/sceneflownet/common/programs/torch/install/bin:/misc/lmbraid17/sceneflownet/common/software-root/bin:/misc/lmbraid17/sceneflownet/ilge/hackathon-caffe2/python/pymill/bin:/misc/software-lin/Qt-5.3.2/5.3/gcc_64/bin:/home/ilge/bin:/home/ilge/dev/pymill/bin:/misc/lmbraid17/sceneflownet/common/programs/torch/install/bin:/misc/lmbraid17/sceneflownet/common/software-root/bin:/misc/lmbraid17/sceneflownet/ilge/hackathon-caffe2/python/pymill/bin:/misc/software-lin/Qt-5.3.2/5.3/gcc_64/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/misc/software-lin/lmbsoft/cuda-6.5.14-x86_64/bin:/misc/software-lin/matlabR2013a/bin:/home/ilge/data/caffe/matching/bin:/misc/lmbraid15/hackathon/common/flo-results/bin:/misc/lmbraid17/sceneflownet/common/data_tools:/misc/software-lin/lmbsoft/cuda-6.5.14-x86_64/bin:/misc/software-lin/matlabR2013a/bin:/home/ilge/data/caffe/matching/bin:/misc/lmbraid15/hackathon/common/flo-results/bin:/misc/lmbraid17/sceneflownet/common/data_tools"

        tb.system('/home/ilge/bin/weight-viewer %s %s' % (prototxt, modelFile))
Exemplo n.º 2
0
    def viewFilters(self, iter):
        self.prepareTraining()
        prototxt = self._trainDir + '/train.prototxt'
        modelFile, iter = self.getModelFile(iter)

        os.environ[
            'LD_LIBRARY_PATH'] = "/misc/lmbraid17/sceneflownet/common/programs/torch/install/lib:/usr/lib/x86_64-linux-gnu:/misc/lmbraid17/sceneflownet/common/software-root/lib:/home/ilge/dev/hackathon-caffe2/build/lib:/misc/software-lin/Qt-5.3.2/5.3/gcc_64/lib:/misc/lmbraid17/sceneflownet/common/programs/torch/install/lib:/usr/lib/x86_64-linux-gnu:/misc/lmbraid17/sceneflownet/common/software-root/lib:/home/ilge/dev/hackathon-caffe2/build/lib:/misc/software-lin/Qt-5.3.2/5.3/gcc_64/lib::/home/ilge/lib:/misc/software-lin/lmbsoft/openni-1.5.2.23-x86_64/usr/lib:/misc/software-lin/lmbsoft/glog/lib:/misc/software-lin/lmbsoft/mkl/lib:/misc/software-lin/lmbsoft/mkl/lib/intel64:/misc/software-lin/lmbsoft/cuda-6.5.14-x86_64/lib64:/misc/software-lin/lmbsoft/cuda-6.0.37-x86_64/lib64:/misc/student/mayern/OpenNI-Bin-Dev-Linux-x64-v1.5.4.0/Lib:/home/ilge/lib:/misc/software-lin/lmbsoft/openni-1.5.2.23-x86_64/usr/lib:/misc/software-lin/lmbsoft/glog/lib:/misc/software-lin/lmbsoft/mkl/lib:/misc/software-lin/lmbsoft/mkl/lib/intel64:/misc/software-lin/lmbsoft/cuda-6.5.14-x86_64/lib64:/misc/software-lin/lmbsoft/cuda-6.0.37-x86_64/lib64:/misc/student/mayern/OpenNI-Bin-Dev-Linux-x64-v1.5.4.0/Lib"
        os.environ[
            'PATH'] = "/home/ilge/bin:/home/ilge/dev/pymill/bin:/misc/lmbraid17/sceneflownet/common/programs/torch/install/bin:/misc/lmbraid17/sceneflownet/common/software-root/bin:/misc/lmbraid17/sceneflownet/ilge/hackathon-caffe2/python/pymill/bin:/misc/software-lin/Qt-5.3.2/5.3/gcc_64/bin:/home/ilge/bin:/home/ilge/dev/pymill/bin:/misc/lmbraid17/sceneflownet/common/programs/torch/install/bin:/misc/lmbraid17/sceneflownet/common/software-root/bin:/misc/lmbraid17/sceneflownet/ilge/hackathon-caffe2/python/pymill/bin:/misc/software-lin/Qt-5.3.2/5.3/gcc_64/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/misc/software-lin/lmbsoft/cuda-6.5.14-x86_64/bin:/misc/software-lin/matlabR2013a/bin:/home/ilge/data/caffe/matching/bin:/misc/lmbraid15/hackathon/common/flo-results/bin:/misc/lmbraid17/sceneflownet/common/data_tools:/misc/software-lin/lmbsoft/cuda-6.5.14-x86_64/bin:/misc/software-lin/matlabR2013a/bin:/home/ilge/data/caffe/matching/bin:/misc/lmbraid15/hackathon/common/flo-results/bin:/misc/lmbraid17/sceneflownet/common/data_tools"

        tb.system('/home/ilge/bin/weight-viewer %s %s' % (prototxt, modelFile))
Exemplo n.º 3
0
    def runProto(self, proto):
        defFile = proto
        modelFile, iter = self._env.getModelFile(self._iter)

        print 'testing for iteration %d ...' % self._iter

        if self._output:
            dir = 'output_%s_%d' % (self._name, self._iter)
            tb.system('mkdir -p %s' % dir)
            self._variables['TEST_OUTPUT'] = 1
            self._variables['TEST_OUTPUT_DIR'] = '"\\"%s\\""' % dir

        self._env.makeScratchDir()
        defPrototxt = self._env.prototxt(defFile, 'scratch', self._variables)
        print defFile, defPrototxt
        tb.system('%s test -weights %s -model %s -gpu 0 -iterations %d 2>&1' % (Environment.caffeBin(), modelFile, defPrototxt, self._iterations))
Exemplo n.º 4
0
    def runProto(self, proto):
        defFile = proto
        modelFile, iter = self._env.getModelFile(self._iter)

        print 'testing for iteration %d ...' % self._iter

        if self._output:
            dir = 'output_%s_%d' % (self._name, self._iter)
            tb.system('mkdir -p %s' % dir)
            self._variables['TEST_OUTPUT'] = 1
            self._variables['TEST_OUTPUT_DIR'] = '"\\"%s\\""' % dir

        self._env.makeScratchDir()
        defPrototxt = self._env.prototxt(defFile, 'scratch', self._variables)
        print defFile, defPrototxt
        tb.system(
            '%s test -weights %s -model %s -gpu 0 -iterations %d 2>&1' %
            (Environment.caffeBin(), modelFile, defPrototxt, self._iterations))
Exemplo n.º 5
0
    def _callCopiedBin(self, cmd):
        bin = './' + os.path.basename(caffeBin())
        tb.notice('making a local copy of %s' % caffeBin())
        os.system('cp %s .' % caffeBin())

        ldd = tb.run('ldd %s' % caffeBin())
        caffeLib = None
        for line in ldd.split('\n'):
            match = re.match('\\s*libcaffe.so => (.*\.so)', line)
            if match:
                caffeLib = match.group(1)
                break
        if caffeLib is None:
            raise Exception('cannot find libcaffe.so dependency')

        tb.notice('making a local copy of %s' % caffeLib)
        os.system('cp %s .' % caffeLib)

        cmd = 'GLOG_logtostderr=%d LD_LIBRARY_PATH=.:$LD_LIBRARY_PATH %s %s' % (not self._quiet, bin, cmd)
        if not self._silent:
            tb.notice('running "%s"' % cmd, 'run')
        tb.system(cmd)
Exemplo n.º 6
0
    def _callCopiedBin(self, cmd):
        bin = './' + os.path.basename(caffeBin())
        tb.notice('making a local copy of %s' % caffeBin())
        os.system('cp %s .' % caffeBin())

        ldd = tb.run('ldd %s' % caffeBin())
        caffeLib = None
        for line in ldd.split('\n'):
            match = re.match('\\s*libcaffe.so => (.*\.so)', line)
            if match:
                caffeLib = match.group(1)
                break
        if caffeLib is None:
            raise Exception('cannot find libcaffe.so dependency')

        tb.notice('making a local copy of %s' % caffeLib)
        os.system('cp %s .' % caffeLib)

        cmd = 'GLOG_logtostderr=%d LD_LIBRARY_PATH=.:$LD_LIBRARY_PATH %s %s' % (
            not self._quiet, bin, cmd)
        if not self._silent:
            tb.notice('running "%s"' % cmd, 'run')
        tb.system(cmd)
Exemplo n.º 7
0
 def _callBin(self, cmd):
     cmd = 'GLOG_logtostderr=%d %s %s' % (not self._quiet, caffeBin(), cmd)
     if not self._silent:
         tb.notice('running "%s"' % cmd, 'run')
     tb.system(cmd)
Exemplo n.º 8
0
 def makeJobDir(self):
     tb.system('mkdir -p %s' % self._jobDir)
Exemplo n.º 9
0
    def makeScratchDir(self):
        tb.system('mkdir -p %s' % self._scratchDir)

        tb.system('rm -f %s/scratch/current' % self._path)
        tb.system('ln -s %s %s/scratch/current' %
                  (self._scratchDir, self._path))
Exemplo n.º 10
0
 def makeTrainDir(self):
     tb.system('mkdir -p %s' % self._trainDir)
     tb.system('mkdir -p %s' % self._logDir)
Exemplo n.º 11
0
def runOnCluster(env, node, gpus, background, insertLocal=True, trackJob=True):
    gpuArch = env.params().gpuArch()
    if node is not None:
        tb.notice(
            'Forwarding job to cluster node %s with %d gpu(s) which are of type %s'
            % (node, gpus, gpuArch), 'info')
    else:
        tb.notice(
            'Forwarding job to cluster with %d gpu(s) which are of type %s' %
            (gpus, gpuArch), 'info')

    env.makeJobDir()

    currentId = '%s/current_id' % env.jobDir()
    if trackJob and os.path.exists(currentId):
        raise Exception('%s exists, there seems to be a job already running' %
                        currentId)

    sysargs = sys.argv
    if insertLocal:
        sysargs.insert(1, '--execute')
    cmd = ' '.join(sysargs)
    home = os.environ['HOME']

    if args.backend == 'python':
        training = os.path.abspath('training')
        cmd = 'LD_LIBRARY_PATH=%s:$LD_LIBRARY_PATH PYTHONPATH=%s:$PYTHONPATH %s' % (
            training, training, cmd)

    qsubCommandFile = '%s/%s-%s.sh' % (env.jobDir(), env.name().replace(
        '/', '_'), time.strftime('%d.%m.%Y-%H:%M:%S'))

    epilogueScript = '%s/epilogue.sh' % env.jobDir()
    open(epilogueScript,
         'w').write("#!/bin/bash\ncd $path\nrm -f jobs/current_id\n")

    if trackJob: saveIdCommand = 'echo $$PBS_JOBID > jobs/current_id'
    else: saveIdCommand = ''

    script = Template(
        '#!/bin/bash\n'
        '\n'
        'umask 0002\n'
        'echo -e "\e[30;42m --- running on" `hostname` "--- \e[0m"\n'
        'cd "$path"\n'
        '$saveIdCommand\n'
        'trap "echo got SIGHUP" SIGHUP\n'
        'trap "echo got SIGUSR1" USR1\n'
        '$command\n'
        'echo done\n'
        'rm -f jobs/current_id\n').substitute(path=env.path(),
                                              command=cmd,
                                              saveIdCommand=saveIdCommand)

    open(qsubCommandFile, 'w').write(script)
    tb.system('chmod a+x "%s"' % qsubCommandFile)

    qsub = 'qsub -l nodes=%s:gpus=%d%s,mem=%dmb,walltime=240:00:00 %s -q gpujob -d %s %s -N %s -T %s' % (
        node if node is not None else '1', gpus,
        (':' + gpuArch) if gpuArch != 'any' else '',
        env.params().requiredMemory(), '-I -x' if not background else '',
        env.path(), qsubCommandFile, env.name(), epilogueScript)

    if background:
        print 'job name: %s' % os.path.basename(qsubCommandFile)
        qsub += ' -j oe -o %s' % (env.jobDir())

    tb.notice("lmbtorque: running %s" % qsub, 'run')

    if not background:
        tb.system(
            'ssh lmbtorque "umask 0002; cd %s; %s;  rm -f jobs/current_id"' %
            (env.path(), qsub))
    else:
        tb.system('ssh lmbtorque "umask 0002; %s"' % (qsub))
    sys.exit(0)
Exemplo n.º 12
0
 def _callBin(self, cmd):
     cmd = 'GLOG_logtostderr=%d %s %s' % (not self._quiet, caffeBin(), cmd)
     if not self._silent:
         tb.notice('running "%s"' % cmd, 'run')
     tb.system(cmd)
Exemplo n.º 13
0
 def makeJobDir(self):
     tb.system('mkdir -p %s' % self._jobDir)
Exemplo n.º 14
0
    def makeScratchDir(self):
        tb.system('mkdir -p %s' % self._scratchDir)

        tb.system('rm -f %s/scratch/current' % self._path)
        tb.system('ln -s %s %s/scratch/current' % (self._scratchDir, self._path))
Exemplo n.º 15
0
 def makeTrainDir(self):
     tb.system('mkdir -p %s' % self._trainDir)
     tb.system('mkdir -p %s' % self._logDir)
Exemplo n.º 16
0
    def copy(self, source, target, copySnapshot, iter):
        tb.system('mkdir -p %s' % target)

        for f in os.listdir(source):
            if f == '.': continue
            if f == '..': continue
            if f == 'training':
                if copySnapshot:
                    os.system('mkdir -p %s/training' % target)

                    modelFiles = iterFiles('.caffemodel',
                                           '%s/training' % source)
                    stateFiles = iterFiles('.solverstate',
                                           '%s/training' % source)

                    if iter != -1:
                        for m in modelFiles:
                            if m.iteration() == iter:
                                tb.system('cp -v %s %s/training' %
                                          (m.filename(), target))

                        for s in stateFiles:
                            if s.iteration() == iter:
                                tb.system('cp -v %s %s/training' %
                                          (s.filename(), target))
                    else:
                        tb.system('cp -v %s %s/training' %
                                  (modelFiles[-1].filename(), target))
                        tb.system('cp -v %s %s/training' %
                                  (stateFiles[-1].filename(), target))

                    tb.system('cp %s %s/training/log.txt %s/training' %
                              ('' if self._silent else '-v', source, target))
                continue
            if f == 'scratch': continue
            if f == 'jobs': continue
            if f.endswith('.pyc'): continue
            if os.path.isdir('%s/%s' % (source, f)) and f.startswith('test_'):
                continue
            if os.path.isdir('%s/%s' % (source, f)) and f.startswith('output'):
                continue

            tb.system('cp -r %s %s/%s %s' %
                      ('' if self._silent else '-v', source, f, target))
Exemplo n.º 17
0
def runOnCluster(env, node, gpus, background, insertLocal=True, trackJob=True):
    gpuArch = env.params().gpuArch()
    if node is not None:
        tb.notice(
            "Forwarding job to cluster node %s with %d gpu(s) which are of type %s" % (node, gpus, gpuArch), "info"
        )
    else:
        tb.notice("Forwarding job to cluster with %d gpu(s) which are of type %s" % (gpus, gpuArch), "info")

    env.makeJobDir()

    currentId = "%s/current_id" % env.jobDir()
    if trackJob and os.path.exists(currentId):
        raise Exception("%s exists, there seems to be a job already running" % currentId)

    sysargs = sys.argv
    if insertLocal:
        sysargs.insert(1, "--execute")
    cmd = " ".join(sysargs)
    home = os.environ["HOME"]

    if args.backend == "python":
        training = os.path.abspath("training")
        cmd = "LD_LIBRARY_PATH=%s:$LD_LIBRARY_PATH PYTHONPATH=%s:$PYTHONPATH %s" % (training, training, cmd)

    qsubCommandFile = "%s/%s-%s.sh" % (env.jobDir(), env.name().replace("/", "_"), time.strftime("%d.%m.%Y-%H:%M:%S"))

    epilogueScript = "%s/epilogue.sh" % env.jobDir()
    open(epilogueScript, "w").write("#!/bin/bash\ncd $path\nrm -f jobs/current_id\n")

    if trackJob:
        saveIdCommand = "echo $$PBS_JOBID > jobs/current_id"
    else:
        saveIdCommand = ""

    script = Template(
        "#!/bin/bash\n"
        "\n"
        "umask 0002\n"
        'echo -e "\e[30;42m --- running on" `hostname` "--- \e[0m"\n'
        'cd "$path"\n'
        "$saveIdCommand\n"
        'trap "echo got SIGHUP" SIGHUP\n'
        'trap "echo got SIGUSR1" USR1\n'
        "$command\n"
        "echo done\n"
        "rm -f jobs/current_id\n"
    ).substitute(path=env.path(), command=cmd, saveIdCommand=saveIdCommand)

    open(qsubCommandFile, "w").write(script)
    tb.system('chmod a+x "%s"' % qsubCommandFile)

    qsub = "qsub -l nodes=%s:gpus=%d%s,mem=%dmb,walltime=240:00:00 %s -q gpujob -d %s %s -N %s -T %s" % (
        node if node is not None else "1",
        gpus,
        (":" + gpuArch) if gpuArch != "any" else "",
        env.params().requiredMemory(),
        "-I -x" if not background else "",
        env.path(),
        qsubCommandFile,
        env.name(),
        epilogueScript,
    )

    if background:
        print "job name: %s" % os.path.basename(qsubCommandFile)
        qsub += " -j oe -o %s" % (env.jobDir())

    tb.notice("lmbtorque: running %s" % qsub, "run")

    if not background:
        tb.system('ssh lmbtorque "umask 0002; cd %s; %s;  rm -f jobs/current_id"' % (env.path(), qsub))
    else:
        tb.system('ssh lmbtorque "umask 0002; %s"' % (qsub))
    sys.exit(0)
Exemplo n.º 18
0
    def copy(self, source, target, copySnapshot, iter):
        tb.system('mkdir -p %s' % target)

        for f in os.listdir(source):
            if f == '.': continue
            if f == '..': continue
            if f == 'training':
                if copySnapshot:
                    os.system('mkdir -p %s/training' % target)

                    modelFiles = iterFiles('.caffemodel', '%s/training' % source)
                    stateFiles = iterFiles('.solverstate', '%s/training' % source)

                    if iter != -1:
                        for m in modelFiles:
                            if m.iteration() == iter:
                                tb.system('cp -v %s %s/training' % (m.filename(), target))

                        for s in stateFiles:
                            if s.iteration() == iter:
                                tb.system('cp -v %s %s/training' % (s.filename(), target))
                    else:
                        tb.system('cp -v %s %s/training' % (modelFiles[-1].filename(), target))
                        tb.system('cp -v %s %s/training' % (stateFiles[-1].filename(), target))

                    tb.system('cp %s %s/training/log.txt %s/training' % ('' if self._silent else '-v', source, target))
                continue
            if f == 'scratch': continue
            if f == 'jobs': continue
            if f.endswith('.pyc'): continue
            if os.path.isdir('%s/%s' % (source,f)) and f.startswith('test_'): continue
            if os.path.isdir('%s/%s' % (source,f)) and f.startswith('output'): continue

            tb.system('cp -r %s %s/%s %s' % ('' if self._silent else '-v', source, f, target))