예제 #1
0
파일: job.py 프로젝트: GrantRVD/neurofinder
    def clone(self):
        """
        Clone the repository given for this pull request
        """
        d = tempfile.mkdtemp()

        with quiet():
            subprocess.call(["git", "clone", self.url, d])
            os.chdir(d)
            subprocess.call(["git", "checkout", "-b", self.branch, "origin/%s" % self.branch])

        base = glob.glob(d + '/submissions/%s*/' % self.login)[0]
        module = base + 'run/'

        return base, module
예제 #2
0
파일: job.py 프로젝트: GrantRVD/neurofinder
    def execute(self, lock, pipe):
        """
        Execute this pull request
        """
        lock.acquire()

        base, module = self.clone()

        f = open(base + 'info.json', 'r')
        info = json.loads(f.read())

        printer.status("Executing pull request %s from user %s"
                       % (self.id, self.login))
        printer.status("Branch name: %s" % self.branch)
        printer.status("Algorithm name: %s" % info['algorithm'])

        sys.path.append(module)
        run = importlib.import_module('run', module)

        spark_home = os.getenv('SPARK_HOME')
        if spark_home is None or spark_home == '':
            raise Exception('must assign the environmental variable SPARK_HOME with the location of Spark')
        sys.path.append(os.path.join(spark_home, 'python'))
        sys.path.append(os.path.join(spark_home, 'python/lib/py4j-0.8.2.1-src.zip'))

        with quiet():
            from thunder import ThunderContext
            from thunder.utils.launch import findThunderEgg
            tsc = ThunderContext.start(master=self.get_master(), appName="neurofinder")
            tsc.addPyFile(findThunderEgg())
            log4j = tsc._sc._jvm.org.apache.log4j
            log4j.LogManager.getRootLogger().setLevel(log4j.Level.ERROR)
            time.sleep(5)

        base_path = 'neuro.datasets.private/challenges/neurofinder.test'
        datasets = ['00.00.test', '00.01.test', '01.00.test', '01.01.test',
                    '02.00.test', '02.01.test', '03.00.test']

        metrics = {'score': [], 'recall': [], 'precision': [], 'overlap': [], 'exactness': []}

        try:
            for ii, name in enumerate(datasets):

                printer.status("Proccessing data set %s" % name)

                data_path = 's3n://' + base_path + '/' + name
                data_info = self.load_info(base_path, name)
                data = tsc.loadImages(data_path + '/images/', recursive=True,
                                      npartitions=600)
                truth = tsc.loadSources(data_path + '/sources/sources.json')
                sources = run.run(data, info=data_info)

                threshold = 6.0 / data_info['pixels-per-micron']

                recall, precision, score = truth.similarity(sources, metric='distance', minDistance=threshold)

                stats = truth.overlap(sources, method='rates', minDistance=threshold)
                if sum(~isnan(stats)) > 0:
                    overlap, exactness = tuple(nanmean(stats, axis=0))
                else:
                    overlap, exactness = 0.0, 1.0

                contributors = str(", ".join(data_info["contributors"]))
                animal = data_info["animal"]
                region = data_info["region"]
                lab = data_info["lab"]

                base = {"dataset": name, "contributors": contributors,
                        "lab": lab, "region": region, "animal": animal}

                m = {"value": score}
                m.update(base)
                metrics['score'].append(m)

                m = {"value": recall}
                m.update(base)
                metrics['recall'].append(m)

                m = {"value": precision}
                m.update(base)
                metrics['precision'].append(m)

                m = {"value": overlap}
                m.update(base)
                metrics['overlap'].append(m)

                m = {"value": exactness}
                m.update(base)
                metrics['exactness'].append(m)

                base = data.mean()
                im = sources.masks(outline=True, base=base.clip(0, percentile(base, 99.9)))
                self.post_image(im, name)

            for k in metrics.keys():
                overall = mean([v['value'] for v in metrics[k]])
                metrics[k].append({"dataset": "overall", "value": overall,
                                   "contributors": "", "region": "", "animal": ""})

            msg = "Execution successful"
            printer.success()
            self.update_status("executed")

        except Exception:
            metrics = None
            msg = "Execution failed"
            printer.error("failed, returning error")
            print(traceback.format_exc())

        self.send_message(msg)
        
        tsc.stop()
        sys.path.remove(module)

        pipe.send((metrics, info))
        lock.release()