def send_message(self, msg): """ Send a message to the github comment """ printer.status("Sending msg to github: '%s'" % msg) self.pull_req.create_issue_comment(msg) printer.success()
def isentry(self): """ Check if the pull request is in the collection, otherwise add it """ entry = self.collection.find_one({"id": self.id}) if not entry: printer.status("Adding entry for pull request %s from user %s" % (self.id, self.login)) payload = {'id': self.id, 'login': self.login, 'validated': False, 'executed': False, 'last_checked': 0, 'validated_at': 0, 'executed_at': 0} self.collection.insert_one(payload)
def validate(self): """ Validate this pull request """ printer.status("Validating pull request %s from user %s" % (self.id, self.login)) base, module = self.clone() validated = True errors = "" if not self.ismergeable(): validated = False errors += "Submission cannot be merged\n" if not os.path.isdir(base): validated = False errors += "Missing directory submissions/%s\n" % self.login if not os.path.isfile(base + 'info.json'): validated = False errors += "Missing info.json\n" else: try: f = open(base + 'info.json', 'r') json.loads(f.read()) except IOError: validated = False errors += "Cannot find info.json file\n" except ValueError: validated = False errors += "Error parsing info.json file\n" if not os.path.isfile(module + 'run.py'): validated = False errors += "Missing run.py\n" if not os.path.isfile(module + '__init__.py'): validated = False errors += "Missing __init__.py\n" else: try: sys.path.append(module) importlib.import_module('run') except ImportError: validated = False errors += "Cannot import run from run.py" if validated: msg = "Validation successful" printer.success() self.update_status("validated") else: msg = "Validation failed:\n" + errors printer.error() self.send_message(msg)
def execute(self, lock, pipe): """ Execute this pull request """ lock.acquire() base, module = self.clone() f = open(base + 'info.json', 'r') info = json.loads(f.read()) printer.status("Executing pull request %s from user %s" % (self.id, self.login)) printer.status("Branch name: %s" % self.branch) printer.status("Algorithm name: %s" % info['algorithm']) sys.path.append(module) run = importlib.import_module('run', module) spark_home = os.getenv('SPARK_HOME') if spark_home is None or spark_home == '': raise Exception('must assign the environmental variable SPARK_HOME with the location of Spark') sys.path.append(os.path.join(spark_home, 'python')) sys.path.append(os.path.join(spark_home, 'python/lib/py4j-0.8.2.1-src.zip')) with quiet(): from thunder import ThunderContext from thunder.utils.launch import findThunderEgg tsc = ThunderContext.start(master=self.get_master(), appName="neurofinder") tsc.addPyFile(findThunderEgg()) log4j = tsc._sc._jvm.org.apache.log4j log4j.LogManager.getRootLogger().setLevel(log4j.Level.ERROR) time.sleep(5) base_path = 'neuro.datasets.private/challenges/neurofinder.test' datasets = ['00.00.test', '00.01.test', '01.00.test', '01.01.test', '02.00.test', '02.01.test', '03.00.test'] metrics = {'score': [], 'recall': [], 'precision': [], 'overlap': [], 'exactness': []} try: for ii, name in enumerate(datasets): printer.status("Proccessing data set %s" % name) data_path = 's3n://' + base_path + '/' + name data_info = self.load_info(base_path, name) data = tsc.loadImages(data_path + '/images/', recursive=True, npartitions=600) truth = tsc.loadSources(data_path + '/sources/sources.json') sources = run.run(data, info=data_info) threshold = 6.0 / data_info['pixels-per-micron'] recall, precision, score = truth.similarity(sources, metric='distance', minDistance=threshold) stats = truth.overlap(sources, method='rates', minDistance=threshold) if sum(~isnan(stats)) > 0: overlap, exactness = tuple(nanmean(stats, axis=0)) else: overlap, exactness = 0.0, 1.0 contributors = str(", ".join(data_info["contributors"])) animal = data_info["animal"] region = data_info["region"] lab = data_info["lab"] base = {"dataset": name, "contributors": contributors, "lab": lab, "region": region, "animal": animal} m = {"value": score} m.update(base) metrics['score'].append(m) m = {"value": recall} m.update(base) metrics['recall'].append(m) m = {"value": precision} m.update(base) metrics['precision'].append(m) m = {"value": overlap} m.update(base) metrics['overlap'].append(m) m = {"value": exactness} m.update(base) metrics['exactness'].append(m) base = data.mean() im = sources.masks(outline=True, base=base.clip(0, percentile(base, 99.9))) self.post_image(im, name) for k in metrics.keys(): overall = mean([v['value'] for v in metrics[k]]) metrics[k].append({"dataset": "overall", "value": overall, "contributors": "", "region": "", "animal": ""}) msg = "Execution successful" printer.success() self.update_status("executed") except Exception: metrics = None msg = "Execution failed" printer.error("failed, returning error") print(traceback.format_exc()) self.send_message(msg) tsc.stop() sys.path.remove(module) pipe.send((metrics, info)) lock.release()
def execute(self): """ Execute this pull request """ printer.status("Executing pull request %s from user %s" % (self.id, self.login)) base, module = self.clone() f = open(base + 'info.json', 'r') info = json.loads(f.read()) sys.path.append(module) run = importlib.import_module('run') spark = os.getenv('SPARK_HOME') if spark is None or spark == '': raise Exception('must assign the environmental variable SPARK_HOME with the location of Spark') sys.path.append(os.path.join(spark, 'python')) sys.path.append(os.path.join(spark, 'python/lib/py4j-0.8.2.1-src.zip')) from thunder import ThunderContext tsc = ThunderContext.start(master="local", appName="neurofinder") datasets = ['data-0', 'data-1', 'data-2', 'data-3', 'data-4', 'data-5'] centers = [5, 7, 9, 11, 13, 15] metrics = {'accuracy': [], 'overlap': [], 'distance': [], 'count': [], 'area': []} try: for ii, name in enumerate(datasets): data, ts, truth = tsc.makeExample('sources', dims=(200, 200), centers=centers[ii], noise=1.0, returnParams=True) sources = run.run(data) accuracy = truth.similarity(sources, metric='distance', thresh=10, minDistance=10) overlap = truth.overlap(sources, minDistance=10) distance = truth.distance(sources, minDistance=10) count = sources.count area = mean(sources.areas) metrics['accuracy'].append({"dataset": name, "value": accuracy}) metrics['overlap'].append({"dataset": name, "value": nanmean(overlap)}) metrics['distance'].append({"dataset": name, "value": nanmean(distance)}) metrics['count'].append({"dataset": name, "value": count}) metrics['area'].append({"dataset": name, "value": area}) im = sources.masks(base=data.mean()) self.post_image(im, name) for k in metrics.keys(): overall = mean([v['value'] for v in metrics[k]]) metrics[k].append({"dataset": "overall", "value": overall}) msg = "Execution successful" printer.success() self.update_status("executed") except Exception: metrics = None msg = "Execution failed" printer.error("failed, returning error") print(traceback.format_exc()) self.send_message(msg) return metrics, info