示例#1
0
class SingleEvaluation(luigi.WrapperTask):
    iteration = luigi.IntParameter(default=186000)
    path = luigi.Parameter(
        default=
        '/nrs/saalfeld/heinrichl/synapses/pre_and_post/pre_and_post-v9.0/run01/'
    )
    data_eval = luigi.TupleParameter(default=('data2016-aligned',
                                              'data2016-unaligned'))
    samples = luigi.TupleParameter(default=('A', 'B', 'C', 'A+', 'B+', 'C+'))

    def requires(self):
        for de in self.data_eval:
            if 'A+' in self.samples or 'B+' in self.samples or 'C+' in self.samples:
                test_samples = []
                for s in self.samples:
                    if ('+' in s):
                        test_samples.append(s)
                test_samples = tuple(test_samples)
                yield SplitModi(self.iteration, self.path, de, 'groundtruth',
                                test_samples, self.data_eval)
            if 'A' in self.samples or 'B' in self.samples or 'C' in self.samples:
                training_samples = []
                for s in self.samples:
                    if not ('+' in s):
                        training_samples.append(s)
                training_samples = tuple(training_samples)
                yield PartnerReport(self.iteration, self.path, de,
                                    'groundtruth', training_samples,
                                    self.data_eval)
                yield PartnerReport(self.iteration, self.path, de,
                                    'validation', training_samples,
                                    self.data_eval)
                yield PartnerReport(self.iteration, self.path, de, 'training',
                                    training_samples, self.data_eval)
示例#2
0
class SgovCompaniesParse(luigi.Task):

    sheets = luigi.TupleParameter(default=None)
    skiptop = luigi.TupleParameter(default=None)
    usecolumns = luigi.TupleParameter(default=None)

    def output(self):
        # _name = build_fname2(self.name, '.csv', suff=today())
        return luigi.LocalTarget(
            build_file_path(self.directory,
                            self.name,
                            '.csv',
                            suff=today(FILENAME_DATE_FORMAT)))

    def run(self):
        for i, target in enumerate(self.input()):
            self.set_status_message('Parsing {}'.format(target.path))
            rows = parse(target.path,
                         CompanieRow,
                         skiprows=self.skiptop,
                         sheets=self.sheets)
            save_csvrows(self.output().path, [attr.astuple(r) for r in rows])

            percent = round((i + 1) * 100 / len(self.input()))
            self.set_progress_percentage(percent)
示例#3
0
class LeaveOneOutEvaluation(luigi.Task):
    name = luigi.Parameter()
    edgetypes = luigi.TupleParameter(default=('linked', 'mutual', 'followee',
                                              'follower'))
    methods = luigi.TupleParameter(default=('mv', 'gm', 'pm', 'rn'))

    def requires(self):
        yield HomeLocation(name=self.name)
        for edgetype in self.edgetypes:
            for method in self.methods:
                yield LeaveOneOutPrediction(name=self.name,
                                            edgetype=edgetype,
                                            method=method)

    def output(self):
        output_path = os.path.join('data/experiments/loocv/evaluation',
                                   '{}.tsv'.format(self.name))
        return luigi.LocalTarget(output_path)

    def run(self):
        truth = self.input()[0]
        cmd = 'python -m snlocest.scripts.evaluate_prf {} {} >> {}'
        with self.output().temporary_path() as temp_output_path:
            for result in self.input()[1:]:
                run(cmd.format(truth.path, result.path, temp_output_path),
                    shell=True,
                    check=True)
示例#4
0
class Crop(luigi.Task):
    it = luigi.IntParameter()
    path = luigi.Parameter()
    de = luigi.Parameter()
    samples = luigi.TupleParameter()
    data_eval = luigi.TupleParameter()
    resources = {'ram': 50}

    @property
    def priority(self):
        if int(self.it) % 10000 == 0:
            return 1. / int(self.it)
        else:
            return 0.

    def requires(self):
        return Predict(self.it, self.path, self.samples, self.data_eval)

    def output(self):
        return luigi.LocalTarget(
            os.path.join(os.path.dirname(self.input()[0].fn), self.de,
                         'crop.msg'))

    def run(self):
        progress = 0.
        self.set_progress_percentage(progress)
        if 'unaligned' in self.de:
            aligned = False
        else:
            aligned = True
        for s in self.samples:
            filename = os.path.join(os.path.dirname(self.input()[0].fn),
                                    self.de, s + '.n5')
            datasets_src = ['clefts', 'pre_dist', 'post_dist']
            datasets_tgt = [
                'clefts_cropped', 'pre_dist_cropped', 'post_dist_cropped'
            ]
            off = offsets[s][aligned]
            sh = shapes[s][aligned]
            f = z5py.File(filename, use_zarr_format=False)
            for dss, dst in zip(datasets_src, datasets_tgt):
                chunk_size = tuple(
                    min(c, shi) for c, shi in zip(f[dss].chunks, sh))
                f.create_dataset(dst,
                                 shape=sh,
                                 compression='gzip',
                                 dtype=f[dss].dtype,
                                 chunks=chunk_size)
                bb = tuple(slice(o, o + shi, None) for o, shi in zip(off, sh))
                f[dst][:] = f[dss][bb]
                f[dst].attrs['offset'] = off[::-1]

                progress += 100. / (len(self.samples) * len(datasets_src))
                try:
                    self.set_progress_percentage(progress)
                except:
                    pass

        done = self.output().open('w')
        done.close()
示例#5
0
class AllEvaluations(luigi.WrapperTask):
    up_to_iteration = luigi.IntParameter(default=200000)
    iteration_step = luigi.IntParameter(default=10000, significant=False)
    data_train = luigi.TupleParameter(default=("data2016-aligned",
                                               "data2016-unaligned"))
    data_eval = luigi.TupleParameter(default=("data2017-aligned",
                                              "data2017-unaligned"))
    augmentation = luigi.TupleParameter(default=("deluxe", "classic", "lite"))
    mode = luigi.TupleParameter(default=("validation", "training"))
    samples = luigi.TupleParameter(default=("A", "B", "C"))

    def requires(self):
        for it in range(
                self.iteration_step,
                self.up_to_iteration + self.iteration_step,
                self.iteration_step,
        ):
            for dt in self.data_train:
                for aug in self.augmentation:
                    for de in self.data_eval:
                        for m in self.mode:
                            # yield CleftReport(it, dt, aug, de, m, self.samples, self.data_eval)
                            if it > 20000:
                                yield PartnerReport(it, dt, aug, de, m,
                                                    self.samples,
                                                    self.data_eval)
示例#6
0
class Crop(luigi.Task):
    it = luigi.IntParameter()
    path = luigi.Parameter()
    de = luigi.Parameter()
    samples = luigi.TupleParameter()
    data_eval = luigi.TupleParameter()
    resources = {"ram": 50}

    @property
    def priority(self):
        if int(self.it) % 10000 == 0:
            return 1.0 / int(self.it)
        else:
            return 0.0

    def requires(self):
        return Predict(self.it, self.path, self.samples, self.data_eval)

    def output(self):
        return luigi.LocalTarget(
            os.path.join(os.path.dirname(self.input()[0].fn), self.de, "crop.msg")
        )

    def run(self):
        progress = 0.0
        self.set_progress_percentage(progress)
        if "unaligned" in self.de:
            aligned = False
        else:
            aligned = True
        for s in self.samples:
            filename = os.path.join(
                os.path.dirname(self.input()[0].fn), self.de, s + ".n5"
            )
            datasets_src = ["clefts", "pre_dist", "post_dist"]
            datasets_tgt = ["clefts_cropped", "pre_dist_cropped", "post_dist_cropped"]
            off = offsets[s][aligned]
            sh = shapes[s][aligned]
            f = zarr.open(filename, mode="a")
            for dss, dst in zip(datasets_src, datasets_tgt):
                chunk_size = tuple(min(c, shi) for c, shi in zip(f[dss].chunks, sh))
                f.create_dataset(
                    name=dst,
                    shape=sh,
                    compressor=numcodecs.GZip(6),
                    dtype=f[dss].dtype,
                    chunks=chunk_size,
                )
                bb = tuple(slice(o, o + shi, None) for o, shi in zip(off, sh))
                f[dst][:] = f[dss][bb]
                f[dst].attrs["offset"] = off[::-1]

                progress += 100.0 / (len(self.samples) * len(datasets_src))
                try:
                    self.set_progress_percentage(progress)
                except:
                    pass

        done = self.output().open("w")
        done.close()
示例#7
0
        class A(luigi.Task):
            task_namespace = 'mynamespace'
            t = luigi.TupleParameter(default=((1, 2), (3, 4)))
            expected = luigi.TupleParameter()

            def complete(self):
                if self.t != self.expected:
                    raise ValueError
                return True
示例#8
0
class SingleEvaluation(luigi.WrapperTask):
    iteration = luigi.IntParameter(default=186000)
    path = luigi.Parameter(
        default=
        "/nrs/saalfeld/heinrichl/synapses/pre_and_post/pre_and_post-v9.0/run01/"
    )
    data_eval = luigi.TupleParameter(default=("data2016-aligned",
                                              "data2016-unaligned"))
    samples = luigi.TupleParameter(default=("A", "B", "C", "A+", "B+", "C+"))

    def requires(self):
        for de in self.data_eval:
            if "A+" in self.samples or "B+" in self.samples or "C+" in self.samples:
                test_samples = []
                for s in self.samples:
                    if "+" in s:
                        test_samples.append(s)
                test_samples = tuple(test_samples)
                yield SplitModi(
                    self.iteration,
                    self.path,
                    de,
                    "groundtruth",
                    test_samples,
                    self.data_eval,
                )
            if "A" in self.samples or "B" in self.samples or "C" in self.samples:
                training_samples = []
                for s in self.samples:
                    if not ("+" in s):
                        training_samples.append(s)
                training_samples = tuple(training_samples)
                yield PartnerReport(
                    self.iteration,
                    self.path,
                    de,
                    "groundtruth",
                    training_samples,
                    self.data_eval,
                )
                yield PartnerReport(
                    self.iteration,
                    self.path,
                    de,
                    "validation",
                    training_samples,
                    self.data_eval,
                )
                yield PartnerReport(
                    self.iteration,
                    self.path,
                    de,
                    "training",
                    training_samples,
                    self.data_eval,
                )
示例#9
0
class Threshold(luigi.Task):
    it = luigi.IntParameter()
    dt = luigi.Parameter()
    aug = luigi.Parameter()
    de = luigi.Parameter()
    samples = luigi.TupleParameter()
    data_eval = luigi.TupleParameter()
    resources = {"ram": 50}

    @property
    def priority(self):
        if int(self.it) % 10000 == 0:
            return 1.0 / int(self.it)
        else:
            return 0.0

    def requires(self):
        return Crop(self.it, self.dt, self.aug, self.de, self.samples,
                    self.data_eval)

    def output(self):
        return luigi.LocalTarget(
            os.path.join(os.path.dirname(self.input().fn), "thr.msg"))

    def run(self):
        thrs = [127, 42]
        progress = 0.0
        self.set_progress_percentage(progress)
        for s in self.samples:
            filename = os.path.join(os.path.dirname(self.input().fn),
                                    s + ".n5")
            dataset_src = "clefts_cropped"
            dataset_tgt = "clefts_cropped_thr{0:}"
            f = zarr.open(filename, mode="a")
            for t in thrs:
                f.empty(
                    name=dataset_tgt.format(t),
                    shape=f[dataset_src].shape,
                    compressor=numcodecs.GZip(6),
                    dtype="uint8",
                    chunks=f[dataset_src].chunks,
                )
                f[dataset_tgt.format(t)][:] = (f[dataset_src][:] > t).astype(
                    np.uint8)
                f[dataset_tgt.format(
                    t)].attrs["offset"] = f[dataset_src].attrs["offset"]
            progress += 100.0 / len(self.samples)
            try:
                self.set_progress_percentage(progress)
            except:
                pass
        done = self.output().open("w")
        done.close()
示例#10
0
class Threshold(luigi.Task):
    it = luigi.IntParameter()
    dt = luigi.Parameter()
    aug = luigi.Parameter()
    de = luigi.Parameter()
    samples = luigi.TupleParameter()
    data_eval = luigi.TupleParameter()
    resources = {'ram': 50}

    @property
    def priority(self):
        if int(self.it) % 10000 == 0:
            return 1. / int(self.it)
        else:
            return 0.

    def requires(self):
        return Crop(self.it, self.dt, self.aug, self.de, self.samples,
                    self.data_eval)

    def output(self):
        return luigi.LocalTarget(
            os.path.join(os.path.dirname(self.input().fn), 'thr.msg'))

    def run(self):
        thrs = [127, 42]
        progress = 0.
        self.set_progress_percentage(progress)
        for s in self.samples:
            filename = os.path.join(os.path.dirname(self.input().fn),
                                    s + '.n5')
            dataset_src = 'clefts_cropped'
            dataset_tgt = 'clefts_cropped_thr{0:}'
            f = z5py.File(filename, use_zarr_format=False)
            for t in thrs:
                f.create_dataset(dataset_tgt.format(t),
                                 shape=f[dataset_src].shape,
                                 compression='gzip',
                                 dtype='uint8',
                                 chunks=f[dataset_src].chunks)
                f[dataset_tgt.format(t)][:] = (f[dataset_src][:] > t).astype(
                    np.uint8)
                f[dataset_tgt.format(
                    t)].attrs['offset'] = f[dataset_src].attrs['offset']
            progress += 100. / len(self.samples)
            try:
                self.set_progress_percentage(progress)
            except:
                pass
        done = self.output().open('w')
        done.close()
示例#11
0
class PartnerReport(luigi.Task):
    it = luigi.IntParameter()
    dt = luigi.Parameter()
    aug = luigi.Parameter()
    de = luigi.Parameter()
    m = luigi.Parameter()

    samples = luigi.TupleParameter()
    data_eval = luigi.TupleParameter()
    resources = {'ram': 50}
    @property
    def priority(self):
        if int(self.it)%10000==0:
            return 1./int(self.it)
        else:
            return 0.
    
    def requires(self):
        return SplitModi(self.it, self.dt, self.aug, self.de, self.m, self.samples, self.data_eval)

    def output(self):
        return luigi.LocalTarget(os.path.join(os.path.dirname(self.input().fn), 'partners.' + self.m + '.json'))
    
    def run(self):
        progress = 0.
        results = dict()
        self.set_progress_percentage(progress)
        for s in self.samples:
            truth = os.path.join('/groups/saalfeld/saalfeldlab/larissa/data/cremieval/', self.de,
                                 s + '.' + self.m + '.h5')
            test = os.path.join(os.path.dirname(self.input().fn), s+'.'+self.m+'.h5')
            truth = CremiFile(truth, 'a')
            test = CremiFile(test, 'a')
            synaptic_partners_eval = SynapticPartners()
            print(test.read_annotations())
            fscore, precision, recall, fp, fn, filtered_matches = synaptic_partners_eval.fscore(
                test.read_annotations(), truth.read_annotations(), truth.read_neuron_ids(), all_stats=True)
            results[s] = dict()
            results[s]['fscore'] = fscore
            results[s]['precision'] = precision
            results[s]['recall'] = recall
            results[s]['fp'] = fp
            results[s]['fn'] = fn
            results[s]['filtered_matches'] = filtered_matches
            progress += 100. / len(self.samples)
            try:
                self.set_progress_percentage(progress)
            except:
                pass
        with self.output().open('w') as done:
            json.dump(results, done)
示例#12
0
class FollowFilteredEdgelist(luigi.Task):
    '''edgelistの左側にunknownが出て来るエッジを消して、居住地の付けたユーザからのデータのみにしたエッジリスト

    Args:
        --name LocationUserListとUnknownListがわかるように保存パスに使われる名前
        --month
    '''
    month = luigi.MonthParameter()
    name = luigi.Parameter()
    type = luigi.ChoiceParameter(choices=['followers', 'following'])
    sources = luigi.TupleParameter(default=('followers', 'following'))

    def requires(self):
        return {
            'edgelist': TwitterFollowRawEdgelist(month=self.month,
                                                 type=self.type),
            'hl': RemainedHomeLocation(name=self.name, month=self.month)
        }

    def output(self):
        return luigi.LocalTarget(
            os.path.join(
                NETWORK_DIR, 'filtered', self.name,
                self.month.strftime('%Y%m_{}.tsv.gz'.format(self.type))))

    def run(self):
        with self.output().temporary_path() as temp_output_path:
            cmd = 'zcat {edgelist.path} | python -m snlocest.scripts.edgefilter -i {hl.path} | gzip > {}'.format(
                temp_output_path, **self.input())
            run(cmd, shell=True, check=True)
示例#13
0
class LibRadarHeatmap(luigi.Task):
    apks = luigi.TupleParameter()
    pkg = luigi.Parameter()
    app_info_folder = cfg.info_app_folder

    def get_app_info(self):
        with self.input()['app_info'].open() as data_file:
            return json.load(data_file)

    # requires application json
    def requires(self):
        appinfo_file = os.path.join(self.app_info_folder, self.pkg,
                                    self.pkg + '.json')
        return {
            'matrix': LibRadarMatrix(pkg=self.pkg, apks=self.apks),
            'app_info': ExternalFile(file_name=appinfo_file)
        }

    # output is the heatmap
    def output(self):
        output_file = os.path.join(cfg.libradar_heatmap_folder,
                                   self.pkg + ".pdf")
        return ExternalFileTarget(output_file)

    # creates the heatmap of permission use and saves it to a file
    def create_heatmap(self, data, row_labels, col_labels):

        pdata = pd.DataFrame(data, index=row_labels, columns=col_labels)
        pdata.index.name = "libraries"
        pdata.columns.name = "Versions"

        # TODO put this in all heatmap creation. refactor code
        row_cluster = True if data.shape[0] > 1 else False

        # get app_info from external file
        app_info = self.get_app_info()

        col_colors = heatmaps.get_col_colors(col_labels, app_info)

        vmax = pdata.values.max()
        splot = heatmaps.plot_heatmap(pdata,
                                      vmax=vmax,
                                      col_colors=col_colors,
                                      row_cluster=row_cluster,
                                      annot=False)

        if not os.path.exists(os.path.dirname(self.output().path)):
            os.makedirs(os.path.dirname(self.output().path))

        splot.savefig(os.path.abspath(self.output().path), format='pdf')

    # creates the heatmap
    def run(self):
        # read app matrix from json
        with self.input()['matrix'].open() as data_file:
            data = json.load(data_file)

        # get matrix and create the heatmap
        matrix = numpy.array(data['m'])
        self.create_heatmap(matrix, data['yl'], data['xl'])
示例#14
0
class MakeItFolder(luigi.ExternalTask):
    it = luigi.IntParameter()
    dt = luigi.IntParameter()
    aug = luigi.Parameter()
    data_eval = luigi.TupleParameter()

    @property
    def priority(self):
        return self.it

    def requires(self):
        return CheckCheckpoint(self.it, self.dt, self.aug)

    def output(self):
        base = os.path.dirname(self.input()[0].fn)
        return luigi.LocalTarget(
            os.path.join(base, "evaluation", str(self.it), self.data_eval[-1])
        )

    def run(self):
        # make the folders
        base = os.path.dirname(self.input()[0].fn)
        for de in self.data_eval:
            if not os.path.exists(os.path.join(base, "evaluation", str(self.it), de)):
                os.makedirs(os.path.join(base, "evaluation", str(self.it), de))
示例#15
0
class DummyS3CopyJSONToTableBase(luigi.contrib.redshift.S3CopyJSONToTable):
    # Class attributes taken from `DummyPostgresImporter` in
    # `../postgres_test.py`.
    aws_access_key_id = AWS_ACCESS_KEY
    aws_secret_access_key = AWS_SECRET_KEY

    host = 'dummy_host'
    database = 'dummy_database'
    user = '******'
    password = '******'
    table = luigi.Parameter(default='dummy_table')
    columns = luigi.TupleParameter(default=(
        ('some_text', 'varchar(255)'),
        ('some_int', 'int'),
    ))

    copy_options = ''
    prune_table = ''
    prune_column = ''
    prune_date = ''

    jsonpath = ''
    copy_json_options = ''

    def s3_load_path(self):
        return 's3://%s/%s' % (BUCKET, KEY)
示例#16
0
class WideRecommender(ClassifierWithTransferLearningKerasModelTraining):
    input_shape: Tuple[int, int] = luigi.TupleParameter(default=(100, ))
    batch_size: int = luigi.IntParameter(default=10)
    learning_rate = luigi.FloatParameter(default=1e-5)
    dense_layers: List[int] = luigi.ListParameter(default=[512, 512])
    dropout: float = luigi.FloatParameter(default=None)
    activation_function: str = luigi.ChoiceParameter(
        choices=KERAS_ACTIVATION_FUNCTIONS.keys(), default="relu")
    kernel_initializer: str = luigi.ChoiceParameter(
        choices=KERAS_WEIGHT_INIT.keys(), default="glorot_uniform")

    def create_base_model(self) -> Model:
        x_input = Input(shape=self.input_shape, name='wide_inp')

        wide = Dense(self.input_shape[0],
                     activation=self.activation_function,
                     kernel_initializer=self.kernel_initializer,
                     name='wide_mlp')(x_input)

        output = Dense(1,
                       activation='sigmoid',
                       kernel_initializer=self.kernel_initializer)(wide)

        model = Model(x_input, output, name='Wide')

        return model

    def create_model_with(self, base_model: Model) -> Model:
        return base_model
示例#17
0
class ParseElasticApi(luigi.Task):

    name = luigi.Parameter(default='')
    version = luigi.Parameter(default='')
    versions = luigi.TupleParameter(default='')
    rep_name = luigi.Parameter(default='')
    chunk_size = luigi.IntParameter(default=CHUNK_SIZE)
    api_key = luigi.Parameter(default=DGOV_API_KEY)
    struct = luigi.Parameter(default=None)
    columns_filter = luigi.DictParameter(default=None)

    def output(self):
        return luigi.LocalTarget(build_fpath(TMP_DIR, self.name, 'csv'))

    def run(self):
        query = '{' + QUERY_TMPL.format(0, self.chunk_size) + '}'
        rep_url = build_url_for_report_page(self.rep_name)
        versions = self.versions
        if not versions:
            versions = load_versions(rep_url)
        for vs in versions:
            url = build_url_for_data_page(self.rep_name, self.api_key,
                                          version=vs, query=query)
            data = load_data(url, self.struct, self.columns_filter)
            save_csvrows(self.output().path, data)
示例#18
0
class RemainedHomeLocation(luigi.Task):
    '''作成した居住地データ(LocationuserList)からunknownになったユーザをひいて、
    ソーシャルネットワークを取得しているuserlistとANDをとったものを保存する

    Args:
        --homelocation-path 居住地データのファイルへのパス
    '''
    name = luigi.Parameter()
    month = luigi.MonthParameter()
    sources = luigi.TupleParameter(default=('followers', 'following'))
    homelocation_path = luigi.Parameter()

    def requires(self):
        return {
            'unknown': UnknownList(month=self.month, sources=self.sources),
            'userlist': LocationUserList(path=self.homelocation_path),
            'seed': SeedUserList(month=self.month)
        }

    def output(self):
        return luigi.LocalTarget(
            os.path.join('data/datasets', self.name, 'groundtruth',
                         os.path.basename(self.input()['userlist'].path)))

    def run(self):
        cmd = 'cat {userlist.path} | python -m snlocest.scripts.edgefilter -e {unknown.path} | python -m snlocest.scripts.edgefilter -i {seed.path} > {}'
        with self.output().temporary_path() as temp_output_path:
            run(cmd.format(temp_output_path, **self.input()),
                shell=True,
                check=True)
示例#19
0
class MLPClassifier(ClassifierWithTransferLearningKerasModelTraining):
    input_shape: Tuple[int, int] = luigi.TupleParameter(default=(100, ))
    batch_size: int = luigi.IntParameter(default=10)
    learning_rate = luigi.FloatParameter(default=1e-5)
    dense_layers: List[int] = luigi.ListParameter(default=[512, 512])
    dropout: float = luigi.FloatParameter(default=None)
    activation_function: str = luigi.ChoiceParameter(
        choices=KERAS_ACTIVATION_FUNCTIONS.keys(), default="relu")
    kernel_initializer: str = luigi.ChoiceParameter(
        choices=KERAS_WEIGHT_INIT.keys(), default="glorot_uniform")

    def create_base_model(self) -> Model:
        x_input = Input(shape=self.input_shape)

        mlp = Dense(self.dense_layers[0],
                    activation=self.activation_function,
                    kernel_initializer=self.kernel_initializer)(x_input)

        for dense_neurons in self.dense_layers[1:]:
            mlp = Dense(dense_neurons,
                        activation=self.activation_function,
                        kernel_initializer=self.kernel_initializer)(mlp)
            #model.add(BatchNormalization())
            if self.dropout:
                mlp = Dropout(self.dropout)(mlp)

        output = Dense(1, activation='sigmoid')(mlp)
        model = Model(x_input, output, name='BaseMLP')

        return model

    def create_model_with(self, base_model: Model) -> Model:
        return base_model
示例#20
0
class CleftReport(luigi.Task):
    it = luigi.IntParameter()
    dt = luigi.Parameter()
    aug = luigi.Parameter()
    de = luigi.Parameter()
    m = luigi.Parameter()
    samples = luigi.TupleParameter()
    data_eval = luigi.TupleParameter()
    resources = {'ram': 10}
    @property
    def priority(self):
        if int(self.it)%10000==0:
            return 1./int(self.it)
        else:
            return 0.

    def requires(self):
        return Threshold(self.it, self.dt, self.aug, self.de, self.samples, self.data_eval)

    def output(self):
        cleftrep = os.path.join(os.path.dirname(self.input().fn),  'cleft.' + self.m + '.json')
        return luigi.LocalTarget(cleftrep)

    def run(self):
        progress = 0.
        self.set_progress_percentage(progress)
        results = dict()
        for s in self.samples:
            thr=127
            testfile = os.path.join(os.path.dirname(self.input().fn), s+'.n5')
            truthfile = os.path.join('/groups/saalfeld/saalfeldlab/larissa/data/cremieval/', self.de, s+'.n5')
            test = np.array(z5py.File(testfile, use_zarr_format=False)['clefts_cropped_thr'+str(thr)][:])
            truth = np.array(z5py.File(truthfile, use_zarr_format=False)['volumes/labels/clefts_cropped'][:])
            mask = np.array(z5py.File(truthfile, use_zarr_format=False)['volumes/masks/'+self.m+'_cropped'][:])
            clefts_evaluation = Clefts(test, truth, np.logical_not(mask))
            results[s] = dict()
            results[s]['false negatives count'] = clefts_evaluation.count_false_negatives()
            results[s]['false positives count'] = clefts_evaluation.count_false_positives()
            results[s]['false negative distance'] = clefts_evaluation.acc_false_negatives()
            results[s]['false positive distance'] = clefts_evaluation.acc_false_positives()
            progress += 100./len(self.samples)
            try:
                self.set_progress_percentage(progress)
            except:
                pass
        with self.output().open('w') as done:
            json.dump(results, done)
示例#21
0
class LinkwaglOutputs(luigi.Task):

    """
    Link all the multifile outputs from wagl into a single file.
    """

    level1 = luigi.Parameter()
    work_root = luigi.Parameter()
    granule = luigi.OptionalParameter(default="")
    acq_parser_hint = luigi.OptionalParameter(default="")
    workflow = luigi.EnumParameter(enum=Workflow)
    vertices = luigi.TupleParameter(default=(5, 5))
    pixel_quality = luigi.BoolParameter()
    method = luigi.EnumParameter(enum=Method, default=Method.SHEAR)
    dsm_fname = luigi.Parameter(significant=False)
    buffer_distance = luigi.FloatParameter(default=8000, significant=False)

    def requires(self):
        container = acquisitions(self.level1, self.acq_parser_hint)
        for group in container.supported_groups:
            kwargs = {
                "level1": self.level1,
                "work_root": self.work_root,
                "granule": self.granule,
                "group": group,
                "workflow": self.workflow,
                "vertices": self.vertices,
                "pixel_quality": self.pixel_quality,
                "method": self.method,
                "dsm_fname": self.dsm_fname,
                "buffer_distance": self.buffer_distance,
            }
            yield DataStandardisation(**kwargs)

    def output(self):
        out_fname = pjoin(dirname(self.work_root), "{}.h5".format(self.granule))
        return luigi.LocalTarget(out_fname)

    def run(self):
        with self.output().temporary_path() as out_fname:
            for root, _, files in os.walk(self.work_root):
                # skip any private files
                if basename(root)[0] == "_":
                    continue

                for file_ in files:
                    if splitext(file_)[1] == ".h5":
                        fname = pjoin(root, file_)
                        grp_name = basename(dirname(fname.replace(self.work_root, "")))

                        with h5py.File(fname, "r") as fid:
                            groups = [g for g in fid]

                        for pth in groups:
                            new_path = ppjoin(self.granule, grp_name, pth)
                            create_external_link(fname, pth, out_fname, new_path)

            with h5py.File(out_fname, "a") as fid:
                fid.attrs["level1_uri"] = self.level1
示例#22
0
class AncillaryData(luigi.Task):

    """Get all ancillary data."""

    level1 = luigi.Parameter()
    work_root = luigi.Parameter(significant=False)
    granule = luigi.OptionalParameter(default="")
    vertices = luigi.TupleParameter()
    workflow = luigi.EnumParameter(enum=Workflow)
    acq_parser_hint = luigi.OptionalParameter(default="")
    aerosol = luigi.DictParameter({"user": 0.05}, significant=False)
    brdf = luigi.DictParameter()
    ozone_path = luigi.Parameter(significant=False)
    water_vapour = luigi.DictParameter({"user": 1.5}, significant=False)
    dem_path = luigi.Parameter(significant=False)
    ecmwf_path = luigi.Parameter(significant=False)
    invariant_height_fname = luigi.Parameter(significant=False)
    compression = luigi.EnumParameter(
        enum=H5CompressionFilter, default=H5CompressionFilter.LZF, significant=False
    )
    filter_opts = luigi.DictParameter(default=None, significant=False)

    def requires(self):
        group = acquisitions(self.level1, self.acq_parser_hint).supported_groups[0]
        args = [self.level1, self.work_root, self.granule, group]
        return CalculateSatelliteAndSolarGrids(*args)

    def output(self):
        return luigi.LocalTarget(pjoin(self.work_root, "ancillary.h5"))

    def run(self):
        container = acquisitions(self.level1, self.acq_parser_hint)
        grn = container.get_granule(granule=self.granule, container=True)
        sbt_path = None

        nbar_paths = {
            "aerosol_dict": self.aerosol,
            "water_vapour_dict": self.water_vapour,
            "ozone_path": self.ozone_path,
            "dem_path": self.dem_path,
            "brdf_dict": self.brdf,
        }

        if self.workflow == Workflow.STANDARD or self.workflow == Workflow.SBT:
            sbt_path = self.ecmwf_path

        with self.output().temporary_path() as out_fname:
            _collect_ancillary(
                grn,
                self.input().path,
                nbar_paths,
                sbt_path,
                self.invariant_height_fname,
                self.vertices,
                out_fname,
                self.compression,
                self.filter_opts,
            )
示例#23
0
class AllEvaluations(luigi.WrapperTask):
    up_to_iteration = luigi.IntParameter(default=200000)
    iteration_step = luigi.IntParameter(default=10000, significant=False)
    data_train = luigi.TupleParameter(default=('data2016-aligned', 'data2016-unaligned'))
    data_eval = luigi.TupleParameter(default=('data2017-aligned', 'data2017-unaligned'))
    augmentation = luigi.TupleParameter(default=('deluxe', 'classic', 'lite'))
    mode = luigi.TupleParameter(default =('validation', 'training'))
    samples = luigi.TupleParameter(default=('A', 'B', 'C'))

    def requires(self):
        for it in range(self.iteration_step, self.up_to_iteration+self.iteration_step, self.iteration_step):
            for dt in self.data_train:
                for aug in self.augmentation:
                    for de in self.data_eval:
                        for m in self.mode:
                            #yield CleftReport(it, dt, aug, de, m, self.samples, self.data_eval)
                            if it>20000:
                                yield PartnerReport(it, dt, aug, de, m, self.samples, self.data_eval)
示例#24
0
class DataStandardisation(luigi.Task):

    """
    Runs the standardised product workflow.
    """
    level1 = luigi.Parameter()
    outdir = luigi.Parameter()
    granule = luigi.OptionalParameter(default='')
    workflow = luigi.EnumParameter(enum=Workflow, default=Workflow.STANDARD)
    vertices = luigi.TupleParameter(default=(5, 5))
    method = luigi.EnumParameter(enum=Method, default=Method.SHEAR)
    pixel_quality = luigi.BoolParameter()
    land_sea_path = luigi.Parameter()
    aerosol = luigi.DictParameter(default={'user': 0.05})
    brdf = luigi.DictParameter()
    ozone_path = luigi.Parameter(significant=False)
    water_vapour = luigi.DictParameter(default={'user': 1.5},
                                       significant=False)
    dem_path = luigi.Parameter(significant=False)
    ecmwf_path = luigi.Parameter(significant=False)
    invariant_height_fname = luigi.Parameter(significant=False)
    dsm_fname = luigi.Parameter(significant=False)
    modtran_exe = luigi.Parameter(significant=False)
    tle_path = luigi.Parameter(significant=False)
    rori = luigi.FloatParameter(default=0.52, significant=False)
    compression = luigi.EnumParameter(enum=H5CompressionFilter,
                                      default=H5CompressionFilter.LZF,
                                      significant=False)
    filter_opts = luigi.DictParameter(default=None, significant=False)
    acq_parser_hint = luigi.OptionalParameter(default='')
    buffer_distance = luigi.FloatParameter(default=8000, significant=False)
    h5_driver = luigi.OptionalParameter(default='', significant=False)
    normalized_solar_zenith = luigi.FloatParameter(default=45.0)

    def output(self):
        fmt = '{label}.wagl.h5'
        label = self.granule if self.granule else basename(self.level1)
        out_fname = fmt.format(label=label)
         
        return luigi.LocalTarget(pjoin(self.outdir, out_fname))

    def run(self):
        if self.workflow == Workflow.STANDARD or self.workflow == Workflow.SBT:
            ecmwf_path = self.ecmwf_path
        else:
            ecmwf_path = None

        with self.output().temporary_path() as out_fname:
            card4l(self.level1, self.granule, self.workflow, self.vertices,
                   self.method, self.pixel_quality, self.land_sea_path,
                   self.tle_path, self.aerosol, self.brdf,
                   self.ozone_path, self.water_vapour,
                   self.dem_path, self.dsm_fname, self.invariant_height_fname,
                   self.modtran_exe, out_fname, ecmwf_path, self.rori,
                   self.buffer_distance, self.compression, self.filter_opts,
                   self.h5_driver, self.acq_parser_hint, self.normalized_solar_zenith)
示例#25
0
class ParseCompanies(luigi.Task):

    sheets = luigi.TupleParameter(default=None)
    skiptop = luigi.TupleParameter(default=None)

    def output(self):
        return luigi.LocalTarget(build_fpath(TMP_DIR, self.name, 'csv'))

    def run(self):
        for i, target in enumerate(self.input()):
            self.set_status_message('Parsing {}'.format(target.path))
            rows = parse(target.path,
                         Row,
                         skiprows=self.skiptop,
                         sheets=self.sheets)
            save_csvrows(self.output().path, [attr.astuple(r) for r in rows])

            percent = round((i + 1) * 100 / len(self.input()))
            self.set_progress_percentage(percent)
示例#26
0
class ParseBigElasticApi(BigDataToCsv):

    name = luigi.Parameter(default='')
    version = luigi.Parameter(default='')
    versions = luigi.TupleParameter(default='')
    rep_name = luigi.Parameter(default='')
    chunk_size = luigi.IntParameter(default=CHUNK_SIZE)
    api_key = luigi.Parameter(default=DGOV_API_KEY)
    struct = luigi.Parameter(default=None)
    columns_filter = luigi.DictParameter(default=None)
示例#27
0
class InterpolateCoefficients(luigi.Task):
    """
    Issues InterpolateCoefficient tasks.
    This acts as a helper task, and links the results from each
    InterpolateCoefficient task single HDF5 file.
    """

    vertices = luigi.TupleParameter()
    workflow = luigi.EnumParameter(enum=Workflow)
    method = luigi.EnumParameter(enum=Method, default=Method.SHEAR)

    def requires(self):
        container = acquisitions(self.level1, self.acq_parser_hint)
        acqs = container.get_acquisitions(group=self.group,
                                          granule=self.granule)

        # NBAR & SBT acquisitions
        nbar_acqs = [a for a in acqs if a.band_type == BandType.REFLECTIVE]
        sbt_acqs = [a for a in acqs if a.band_type == BandType.THERMAL]

        tasks = {}
        for coefficient in self.workflow.atmos_coefficients:
            if coefficient in Workflow.NBAR.atmos_coefficients:
                band_acqs = nbar_acqs
            else:
                band_acqs = sbt_acqs

            for acq in band_acqs:
                key = (acq.band_name, coefficient)
                kwargs = {
                    'level1': self.level1,
                    'work_root': self.work_root,
                    'granule': self.granule,
                    'group': self.group,
                    'band_name': acq.band_name,
                    'coefficient': coefficient,
                    'workflow': self.workflow,
                    'vertices': self.vertices,
                    'method': self.method
                }
                tasks[key] = InterpolateCoefficient(**kwargs)
        return tasks

    def output(self):
        out_fname = pjoin(self.work_root, self.group,
                          'interpolated-coefficients.h5')
        return luigi.LocalTarget(out_fname)

    def run(self):
        fnames = {}
        for key, value in self.input().items():
            fnames[key] = value.path

        with self.output().temporary_path() as out_fname:
            link_interpolated_data(fnames, out_fname)
示例#28
0
class CommonDomains(luigi.Task):
    apks = luigi.TupleParameter()
    pkg = luigi.Parameter()
    domains_folder = cfg.dynamic_bro_analysis_folder

    def get_domains_file_path(self, version, date):
        return os.path.join(self.domains_folder,
                            self.pkg,
                            self.pkg + '_' + version + '_' + date,
                            'domains.txt')

    # requires json of single releases
    def requires(self):
        return [StringoidParse(file_name=apk) for apk in self.apks]

    # output is the json file with aggregated info of the app
    def output(self):
        output_file = os.path.join(cfg.stringoid_commondomains_folder,
                                   self.pkg + "_commondomains.json")
        return ExternalFileTarget(output_file)

    def run(self):
        commondomains = {}
        for i in self.input():
            domains_list = []
            both = []
            commondomain = {}
            with open(i.path) as url_file:
                urls = json.load(url_file)
                version = i.path.split("_")[-2]
                date = i.path.split("_")[-1].replace(".json", "")
                domains_path = self.get_domains_file_path(version, date)
                urls_list = urls.keys()
                with open(domains_path) as domain_file:
                    for line in domain_file:
                        domains_list.append(line.strip())
                        for url in urls.keys():
                            if line.strip() in url:
                                both.append(line.strip())
                                domains_list.remove(line.strip())
                                urls_list.remove(url)
                                break
            commondomain["domains"] = domains_list
            commondomain["urls"] = urls_list
            commondomain["both"] = both
            commondomain["domains_number"] = len(domains_list)
            commondomain["urls_number"] = len(urls_list)
            commondomain["both_number"] = len(both)
            commondomains[version] = commondomain

        for i in self.input():
            i.cleanup()

        with self.output().open('w') as f:
            json.dump(commondomains, f, sort_keys=True)
示例#29
0
文件: pipeline.py 项目: FDA/openfda
class LoadJSON(index_util.LoadJSONBase):
    batch = luigi.TupleParameter()
    last_update_date = luigi.Parameter()
    index_name = 'devicerecall'
    mapping_file = './schemas/device_recall_mapping.json'
    use_checksum = True
    optimize_index = False
    docid_key = 'product_res_number'

    def _data(self):
        return AnnotateWeeklyBatch(self.batch)
示例#30
0
class ClosedDataset(luigi.WrapperTask):
    '''closed_[name]なデータセットを作る'''
    name = luigi.Parameter()
    sources = luigi.TupleParameter(default=('linked', 'mutual', 'followee',
                                            'follower'))

    def requires(self):
        dst_name = 'closed_' + self.name
        return ([CopyGroundtruth(src_name=self.name, dst_name=dst_name)] + [
            ClosedNetwork(src_name=self.name, dst_name=dst_name, source=s)
            for s in self.sources
        ])