def output(self): output_paths = [] for model in self.models: output_paths.append("{}_pca.csv".format(model)) outputs = [paths.output(p, 'pca') for p in output_paths] return [luigi.LocalTarget(output) for output in outputs]
def run(self): metadata = pd.read_csv(self.input()[0].fn, index_col=0) metadata['sample_id'] = metadata['sample_id'].astype(str) metadata = metadata.set_index('sample_id') metadata = metadata[['env_material']] metrics = pd.read_csv(self.input()[1][0].fn) for i, model in enumerate(self.models): df = metrics[metrics['embedding'] == model] best = df.loc[df['test_f1_score'].idxmax()] best_training_data = best['training_data_name'] training_data_path = paths.output(best_training_data, 'training_data') training_data = pd.read_pickle(training_data_path) print('Training data shape', training_data.shape) idx = training_data.index X = training_data.drop(self.target, axis=1) pca = PCA(n_components=3) transformed = pd.DataFrame(pca.fit_transform(X), index=idx) transformed = transformed.merge(metadata, left_index=True, right_index=True) print("{} Explained Variance".format(model), pca.explained_variance_ratio_) output_path = self.output()[i].path transformed.to_csv(output_path)
def output(self): output_paths = [ 'speciesid_to_tax.csv', 'taxonomy_97_transitive_closure.csv' ] outputs = [paths.output(p) for p in output_paths] return [luigi.LocalTarget(output) for output in outputs]
def output(self): output_paths = [ ("{}_lr_model.pkl".format(self.name()), 'model'), ("{}_lr_model_metrics.csv".format(self.name()), 'metrics'), (self.training_data_name(), 'training_data'), ] outputs = [paths.output(p[0], p[1]) for p in output_paths] return [luigi.LocalTarget(output) for output in outputs]
def output(self): filename = "biom.pkl" local_file_path = paths.output(filename) return luigi.LocalTarget(local_file_path)
def output(self): filename = "alpha_diversity.pkl" local_file_path = paths.output(filename) return luigi.LocalTarget(local_file_path)
def output(self): local_file_path = paths.output(self.filename) return luigi.LocalTarget(local_file_path)
def output(self): output_paths = ['labeled_metadata.csv', 'label_statistics.csv'] outputs = [paths.output(p) for p in output_paths] return [luigi.LocalTarget(output) for output in outputs]
def output(self): output_paths = ['body_site.csv'] outputs = [paths.output(p) for p in output_paths] return [luigi.LocalTarget(output) for output in outputs]
def output(self): filename = "{}_training_data.pkl".format(self.target) local_file_path = paths.output(filename) return luigi.LocalTarget(local_file_path)
def output(self): output_paths = ["{}_combined_metrics.csv".format(self.target)] outputs = [paths.output(p, 'metrics') for p in output_paths] return [luigi.LocalTarget(output) for output in outputs]
def output(self): filename = "sentences_{}.cor".format(self.use_value) local_file_path = paths.output(filename) return luigi.LocalTarget(local_file_path)
def output(self): filename = "biom_dim_w2v_{}_{}_{}_{}.pkl".format( self.use_value, self.min_count, self.size, self.epochs) local_file_path = paths.output(filename) return luigi.LocalTarget(local_file_path)
def output(self): output_paths = ['sample_id_to_tax.csv', 'hyperbolic_df.pkl'] outputs = [paths.output(p) for p in output_paths] return [luigi.LocalTarget(output) for output in outputs]