def clone(self): """ Clone the repository given for this pull request """ d = tempfile.mkdtemp() with quiet(): subprocess.call(["git", "clone", self.url, d]) os.chdir(d) subprocess.call(["git", "checkout", "-b", self.branch, "origin/%s" % self.branch]) base = glob.glob(d + '/submissions/%s*/' % self.login)[0] module = base + 'run/' return base, module
def execute(self, lock, pipe): """ Execute this pull request """ lock.acquire() base, module = self.clone() f = open(base + 'info.json', 'r') info = json.loads(f.read()) printer.status("Executing pull request %s from user %s" % (self.id, self.login)) printer.status("Branch name: %s" % self.branch) printer.status("Algorithm name: %s" % info['algorithm']) sys.path.append(module) run = importlib.import_module('run', module) spark_home = os.getenv('SPARK_HOME') if spark_home is None or spark_home == '': raise Exception('must assign the environmental variable SPARK_HOME with the location of Spark') sys.path.append(os.path.join(spark_home, 'python')) sys.path.append(os.path.join(spark_home, 'python/lib/py4j-0.8.2.1-src.zip')) with quiet(): from thunder import ThunderContext from thunder.utils.launch import findThunderEgg tsc = ThunderContext.start(master=self.get_master(), appName="neurofinder") tsc.addPyFile(findThunderEgg()) log4j = tsc._sc._jvm.org.apache.log4j log4j.LogManager.getRootLogger().setLevel(log4j.Level.ERROR) time.sleep(5) base_path = 'neuro.datasets.private/challenges/neurofinder.test' datasets = ['00.00.test', '00.01.test', '01.00.test', '01.01.test', '02.00.test', '02.01.test', '03.00.test'] metrics = {'score': [], 'recall': [], 'precision': [], 'overlap': [], 'exactness': []} try: for ii, name in enumerate(datasets): printer.status("Proccessing data set %s" % name) data_path = 's3n://' + base_path + '/' + name data_info = self.load_info(base_path, name) data = tsc.loadImages(data_path + '/images/', recursive=True, npartitions=600) truth = tsc.loadSources(data_path + '/sources/sources.json') sources = run.run(data, info=data_info) threshold = 6.0 / data_info['pixels-per-micron'] recall, precision, score = truth.similarity(sources, metric='distance', minDistance=threshold) stats = truth.overlap(sources, method='rates', minDistance=threshold) if sum(~isnan(stats)) > 0: overlap, exactness = tuple(nanmean(stats, axis=0)) else: overlap, exactness = 0.0, 1.0 contributors = str(", ".join(data_info["contributors"])) animal = data_info["animal"] region = data_info["region"] lab = data_info["lab"] base = {"dataset": name, "contributors": contributors, "lab": lab, "region": region, "animal": animal} m = {"value": score} m.update(base) metrics['score'].append(m) m = {"value": recall} m.update(base) metrics['recall'].append(m) m = {"value": precision} m.update(base) metrics['precision'].append(m) m = {"value": overlap} m.update(base) metrics['overlap'].append(m) m = {"value": exactness} m.update(base) metrics['exactness'].append(m) base = data.mean() im = sources.masks(outline=True, base=base.clip(0, percentile(base, 99.9))) self.post_image(im, name) for k in metrics.keys(): overall = mean([v['value'] for v in metrics[k]]) metrics[k].append({"dataset": "overall", "value": overall, "contributors": "", "region": "", "animal": ""}) msg = "Execution successful" printer.success() self.update_status("executed") except Exception: metrics = None msg = "Execution failed" printer.error("failed, returning error") print(traceback.format_exc()) self.send_message(msg) tsc.stop() sys.path.remove(module) pipe.send((metrics, info)) lock.release()