Пример #1
0
    def output(self):
        output_paths = []
        for model in self.models:
            output_paths.append("{}_pca.csv".format(model))

        outputs = [paths.output(p, 'pca') for p in output_paths]
        return [luigi.LocalTarget(output) for output in outputs]
Пример #2
0
    def run(self):
        metadata = pd.read_csv(self.input()[0].fn, index_col=0)
        metadata['sample_id'] = metadata['sample_id'].astype(str)
        metadata = metadata.set_index('sample_id')
        metadata = metadata[['env_material']]

        metrics = pd.read_csv(self.input()[1][0].fn)

        for i, model in enumerate(self.models):
            df = metrics[metrics['embedding'] == model]
            best = df.loc[df['test_f1_score'].idxmax()]
            best_training_data = best['training_data_name']
            training_data_path = paths.output(best_training_data,
                                              'training_data')

            training_data = pd.read_pickle(training_data_path)

            print('Training data shape', training_data.shape)

            idx = training_data.index
            X = training_data.drop(self.target, axis=1)

            pca = PCA(n_components=3)
            transformed = pd.DataFrame(pca.fit_transform(X), index=idx)
            transformed = transformed.merge(metadata,
                                            left_index=True,
                                            right_index=True)
            print("{} Explained Variance".format(model),
                  pca.explained_variance_ratio_)

            output_path = self.output()[i].path
            transformed.to_csv(output_path)
Пример #3
0
    def output(self):
        output_paths = [
            'speciesid_to_tax.csv', 'taxonomy_97_transitive_closure.csv'
        ]

        outputs = [paths.output(p) for p in output_paths]
        return [luigi.LocalTarget(output) for output in outputs]
    def output(self):
        output_paths = [
            ("{}_lr_model.pkl".format(self.name()), 'model'),
            ("{}_lr_model_metrics.csv".format(self.name()), 'metrics'),
            (self.training_data_name(), 'training_data'),
        ]

        outputs = [paths.output(p[0], p[1]) for p in output_paths]
        return [luigi.LocalTarget(output) for output in outputs]
Пример #5
0
 def output(self):
     filename = "biom.pkl"
     local_file_path = paths.output(filename)
     return luigi.LocalTarget(local_file_path)
Пример #6
0
 def output(self):
     filename = "alpha_diversity.pkl"
     local_file_path = paths.output(filename)
     return luigi.LocalTarget(local_file_path)
 def output(self):
     local_file_path = paths.output(self.filename)
     return luigi.LocalTarget(local_file_path)
    def output(self):
        output_paths = ['labeled_metadata.csv', 'label_statistics.csv']

        outputs = [paths.output(p) for p in output_paths]
        return [luigi.LocalTarget(output) for output in outputs]
    def output(self):
        output_paths = ['body_site.csv']

        outputs = [paths.output(p) for p in output_paths]
        return [luigi.LocalTarget(output) for output in outputs]
 def output(self):
     filename = "{}_training_data.pkl".format(self.target)
     local_file_path = paths.output(filename)
     return luigi.LocalTarget(local_file_path)
Пример #11
0
 def output(self):
     output_paths = ["{}_combined_metrics.csv".format(self.target)]
     outputs = [paths.output(p, 'metrics') for p in output_paths]
     return [luigi.LocalTarget(output) for output in outputs]
Пример #12
0
 def output(self):
     filename = "sentences_{}.cor".format(self.use_value)
     local_file_path = paths.output(filename)
     return luigi.LocalTarget(local_file_path)
Пример #13
0
 def output(self):
     filename = "biom_dim_w2v_{}_{}_{}_{}.pkl".format(
         self.use_value, self.min_count, self.size, self.epochs)
     local_file_path = paths.output(filename)
     return luigi.LocalTarget(local_file_path)
Пример #14
0
    def output(self):
        output_paths = ['sample_id_to_tax.csv', 'hyperbolic_df.pkl']

        outputs = [paths.output(p) for p in output_paths]
        return [luigi.LocalTarget(output) for output in outputs]