Exemplo n.º 1
0
 def send_message(self, msg):
     """
     Send a message to the github comment
     """
     printer.status("Sending msg to github: '%s'" % msg)
     self.pull_req.create_issue_comment(msg)
     printer.success()
Exemplo n.º 2
0
 def isentry(self):
     """
     Check if the pull request is in the collection, otherwise add it
     """
     entry = self.collection.find_one({"id": self.id})
     if not entry:
         printer.status("Adding entry for pull request %s from user %s" % (self.id, self.login))
         payload = {'id': self.id, 'login': self.login, 'validated': False, 'executed': False,
                    'last_checked': 0, 'validated_at': 0, 'executed_at': 0}
         self.collection.insert_one(payload)
Exemplo n.º 3
0
    def validate(self):
        """
        Validate this pull request
        """
        printer.status("Validating pull request %s from user %s" % (self.id, self.login))

        base, module = self.clone()

        validated = True
        errors = ""

        if not self.ismergeable():
            validated = False
            errors += "Submission cannot be merged\n"

        if not os.path.isdir(base):
            validated = False
            errors += "Missing directory submissions/%s\n" % self.login
        if not os.path.isfile(base + 'info.json'):
            validated = False
            errors += "Missing info.json\n"
        else:
            try:
                f = open(base + 'info.json', 'r')
                json.loads(f.read())
            except IOError:
                validated = False
                errors += "Cannot find info.json file\n"
            except ValueError:
                validated = False
                errors += "Error parsing info.json file\n"

        if not os.path.isfile(module + 'run.py'):
            validated = False
            errors += "Missing run.py\n"
        if not os.path.isfile(module + '__init__.py'):
            validated = False
            errors += "Missing __init__.py\n"
        else:
            try:
                sys.path.append(module)
                importlib.import_module('run')
            except ImportError:
                validated = False
                errors += "Cannot import run from run.py"

        if validated:
            msg = "Validation successful"
            printer.success()
            self.update_status("validated")
        else:
            msg = "Validation failed:\n" + errors
            printer.error()

        self.send_message(msg)
Exemplo n.º 4
0
    def execute(self, lock, pipe):
        """
        Execute this pull request
        """
        lock.acquire()

        base, module = self.clone()

        f = open(base + 'info.json', 'r')
        info = json.loads(f.read())

        printer.status("Executing pull request %s from user %s"
                       % (self.id, self.login))
        printer.status("Branch name: %s" % self.branch)
        printer.status("Algorithm name: %s" % info['algorithm'])

        sys.path.append(module)
        run = importlib.import_module('run', module)

        spark_home = os.getenv('SPARK_HOME')
        if spark_home is None or spark_home == '':
            raise Exception('must assign the environmental variable SPARK_HOME with the location of Spark')
        sys.path.append(os.path.join(spark_home, 'python'))
        sys.path.append(os.path.join(spark_home, 'python/lib/py4j-0.8.2.1-src.zip'))

        with quiet():
            from thunder import ThunderContext
            from thunder.utils.launch import findThunderEgg
            tsc = ThunderContext.start(master=self.get_master(), appName="neurofinder")
            tsc.addPyFile(findThunderEgg())
            log4j = tsc._sc._jvm.org.apache.log4j
            log4j.LogManager.getRootLogger().setLevel(log4j.Level.ERROR)
            time.sleep(5)

        base_path = 'neuro.datasets.private/challenges/neurofinder.test'
        datasets = ['00.00.test', '00.01.test', '01.00.test', '01.01.test',
                    '02.00.test', '02.01.test', '03.00.test']

        metrics = {'score': [], 'recall': [], 'precision': [], 'overlap': [], 'exactness': []}

        try:
            for ii, name in enumerate(datasets):

                printer.status("Proccessing data set %s" % name)

                data_path = 's3n://' + base_path + '/' + name
                data_info = self.load_info(base_path, name)
                data = tsc.loadImages(data_path + '/images/', recursive=True,
                                      npartitions=600)
                truth = tsc.loadSources(data_path + '/sources/sources.json')
                sources = run.run(data, info=data_info)

                threshold = 6.0 / data_info['pixels-per-micron']

                recall, precision, score = truth.similarity(sources, metric='distance', minDistance=threshold)

                stats = truth.overlap(sources, method='rates', minDistance=threshold)
                if sum(~isnan(stats)) > 0:
                    overlap, exactness = tuple(nanmean(stats, axis=0))
                else:
                    overlap, exactness = 0.0, 1.0

                contributors = str(", ".join(data_info["contributors"]))
                animal = data_info["animal"]
                region = data_info["region"]
                lab = data_info["lab"]

                base = {"dataset": name, "contributors": contributors,
                        "lab": lab, "region": region, "animal": animal}

                m = {"value": score}
                m.update(base)
                metrics['score'].append(m)

                m = {"value": recall}
                m.update(base)
                metrics['recall'].append(m)

                m = {"value": precision}
                m.update(base)
                metrics['precision'].append(m)

                m = {"value": overlap}
                m.update(base)
                metrics['overlap'].append(m)

                m = {"value": exactness}
                m.update(base)
                metrics['exactness'].append(m)

                base = data.mean()
                im = sources.masks(outline=True, base=base.clip(0, percentile(base, 99.9)))
                self.post_image(im, name)

            for k in metrics.keys():
                overall = mean([v['value'] for v in metrics[k]])
                metrics[k].append({"dataset": "overall", "value": overall,
                                   "contributors": "", "region": "", "animal": ""})

            msg = "Execution successful"
            printer.success()
            self.update_status("executed")

        except Exception:
            metrics = None
            msg = "Execution failed"
            printer.error("failed, returning error")
            print(traceback.format_exc())

        self.send_message(msg)
        
        tsc.stop()
        sys.path.remove(module)

        pipe.send((metrics, info))
        lock.release()
Exemplo n.º 5
0
    def execute(self):
        """
        Execute this pull request
        """
        printer.status("Executing pull request %s from user %s" % (self.id, self.login))

        base, module = self.clone()

        f = open(base + 'info.json', 'r')
        info = json.loads(f.read())

        sys.path.append(module)
        run = importlib.import_module('run')

        spark = os.getenv('SPARK_HOME')
        if spark is None or spark == '':
            raise Exception('must assign the environmental variable SPARK_HOME with the location of Spark')
        sys.path.append(os.path.join(spark, 'python'))
        sys.path.append(os.path.join(spark, 'python/lib/py4j-0.8.2.1-src.zip'))

        from thunder import ThunderContext
        tsc = ThunderContext.start(master="local", appName="neurofinder")

        datasets = ['data-0', 'data-1', 'data-2', 'data-3', 'data-4', 'data-5']
        centers = [5, 7, 9, 11, 13, 15]
        metrics = {'accuracy': [], 'overlap': [], 'distance': [], 'count': [], 'area': []}

        try:
            for ii, name in enumerate(datasets):
                data, ts, truth = tsc.makeExample('sources', dims=(200, 200),
                                                  centers=centers[ii], noise=1.0, returnParams=True)
                sources = run.run(data)

                accuracy = truth.similarity(sources, metric='distance', thresh=10, minDistance=10)
                overlap = truth.overlap(sources, minDistance=10)
                distance = truth.distance(sources, minDistance=10)
                count = sources.count
                area = mean(sources.areas)

                metrics['accuracy'].append({"dataset": name, "value": accuracy})
                metrics['overlap'].append({"dataset": name, "value": nanmean(overlap)})
                metrics['distance'].append({"dataset": name, "value": nanmean(distance)})
                metrics['count'].append({"dataset": name, "value": count})
                metrics['area'].append({"dataset": name, "value": area})

                im = sources.masks(base=data.mean())
                self.post_image(im, name)

            for k in metrics.keys():
                overall = mean([v['value'] for v in metrics[k]])
                metrics[k].append({"dataset": "overall", "value": overall})

            msg = "Execution successful"
            printer.success()
            self.update_status("executed")

        except Exception:
            metrics = None
            msg = "Execution failed"
            printer.error("failed, returning error")
            print(traceback.format_exc())

        self.send_message(msg)

        return metrics, info