def main(data: utils.URLPath, kfold_dir: utils.URLPath, output: utils.URLPath): # dataset = io_functions.load_case_collection(data, meta) # dataset.set_data_path(utils.URLPath("")) dataset = som_dataset.SOMDataset.from_path(data) models = [] dirs = next(os.walk(kfold_dir))[1] for dir in dirs: models.append(utils.URLPath(os.path.join(kfold_dir, dir))) aucs = [] curves = [] for i, model in enumerate(models): print(model) model = SOMClassifier.load(model) validate = model.get_validation_data(dataset) grps = validate.group_count groups = model.config.groups if len(grps.keys()) != len(groups): continue else: val_seq = model.create_sequence(validate) trues = np.concatenate([val_seq[i][1] for i in range(len(val_seq))]) preds = np.array([p for p in model.model.predict_generator(val_seq)]) auc, curve = create_roc_results(trues, preds, output / f"roc_n{i}", model) aucs.append(auc) curves.append(curve) compute_mean_ROC(curves, output)
def main( data: utils.URLPath = None, model: utils.URLPath = None, preds: utils.URLPath = None, output: utils.URLPath = None, ): data = utils.URLPath("/data/flowcat-data/paper-cytometry/som/unused") dataset = io_functions.load_case_collection(data, data + ".json.gz") # output = utils.URLPath("/data/flowcat-data/paper-cytometry/tsne") output = utils.URLPath("teststuff_unused_style") output.mkdir() # predictions = io_functions.load_json(utils.URLPath("/data/flowcat-data/paper-cytometry/tsne/prediction.json")) model = SOMClassifier.load(utils.URLPath("/data/flowcat-data/paper-cytometry/classifier")) som_tsne(dataset, model, output)
def load(cls, path: str = None, ref_path: str = None, cls_path: str = None): """Load classifier from the given path, alternatively give a separate path for reference and classifier.""" if path is not None: ref_path = utils.URLPath(path) / "reference" cls_path = utils.URLPath(path) / "classifier" elif ref_path is not None and cls_path is not None: ref_path = utils.URLPath(ref_path) cls_path = utils.URLPath(cls_path) else: raise ValueError( "Either path or ref_path and cls_path need to be set.") return cls(io_functions.load_casesom(ref_path), SOMClassifier.load(cls_path), SOMSaliency.load(cls_path))
def train_som_classifier( train_dataset: "CaseCollection", validate_dataset: "CaseCollection", config: SOMClassifierConfig = None, class_weights=None, model_fun: "Callable" = create_model_multi_input, ) -> "SOMClassifier": """Configure the dataset based on config and train a given model.""" model = SOMClassifier(config) model.create_model(model_fun) train = model.create_sequence(train_dataset, config.train_batch_size) if validate_dataset is not None: validate = model.create_sequence(validate_dataset, config.valid_batch_size) else: validate = None model.train_generator(train, validate, epochs=config.train_epochs, class_weight=class_weights) return model
def run_transfer(options, train_dataset, validate_dataset): config = options["config"] base_model = models.load_model(options["base_model_path"]) tl_model = create_tl_model(base_model, config) model = SOMClassifier(config, tl_model) train = model.create_sequence(train_dataset, config.train_batch_size) if validate_dataset is not None: validate = model.create_sequence(validate_dataset, config.valid_batch_size) else: validate = None model.train_generator(train, validate, epochs=config.train_epochs, class_weight=None) output = utils.URLPath(options["output_path"]) if validate: pred_arr, pred_labels = model.predict_generator(validate) true_labels = validate.true_labels pred_df = pd.DataFrame(pred_arr, columns=validate.binarizer.classes_, index=validate.dataset.labels) io_functions.save_csv(pred_df, output / "preds.csv") io_functions.save_json({"true": list(true_labels), "pred": list(pred_labels)}, output / "preds_labels.json") generate_all_metrics(true_labels, pred_labels, config.mapping, output) model.save(output) model.save_information(output) keras.backend.clear_session() del model
def main(data: utils.URLPath, model: utils.URLPath, output: utils.URLPath): dataset = io_functions.load_case_collection(data, data + ".json") dataset.set_data_path(utils.URLPath("")) model = SOMClassifier.load(model) validate = model.get_validation_data(dataset) val_seq = model.create_sequence(validate) trues = np.concatenate([val_seq[i][1] for i in range(len(val_seq))]) preds = np.array([p for p in model.model.predict_generator(val_seq)]) create_roc_results(trues, preds, output / "roc", model) create_threshold_results(trues, preds, output / "threshold", model) # tsne of result vectors embedding_path = output / "embedding-preds" embedding_path.mkdir() pred_labels = val_seq.true_labels groups = model.config["groups"] groups.remove("normal") groups = ["normal", *groups] all_groups = groups + ["AML", "MM", "HCLv"] colors = sns.cubehelix_palette(len(all_groups), rot=4, dark=0.30) perplexity = 50 # tsne of intermediate layers intermediate_model = keras.Model( inputs=model.model.input, outputs=model.model.get_layer("concatenate_1").output) intermed_preds = np.array( [p for p in intermediate_model.predict_generator(val_seq)]) # unknown data udata = utils.URLPath("output/unknown-cohorts-processing/som/som") udataset = io_functions.load_case_collection(udata, udata + ".json") udataset.set_data_path(utils.URLPath("")) un_seq = model.create_sequence(udataset) intermed_upreds = np.array( [p for p in intermediate_model.predict_generator(un_seq)]) all_intermed = np.concatenate((intermed_preds, intermed_upreds)) all_labels = pred_labels + un_seq.true_labels umap_inter_all = UMAP(n_neighbors=30).fit_transform(all_intermed) plot_embedded(umap_inter_all, all_labels, all_groups, colors=colors).savefig(str(embedding_path / f"umap_intermediate_all.png"), dpi=300) tsne_inter_all = manifold.TSNE( perplexity=perplexity).fit_transform(all_intermed) plot_embedded( tsne_inter_all, all_labels, all_groups, colors=colors).savefig(str( embedding_path / f"tsne_intermediate_all_p{perplexity}.png"), dpi=300) # create som tsne for known and unknown data all_cases = validate.cases + udataset.cases case_data = [] for case in all_cases: somdata = np.concatenate([ case.get_tube(tube, kind="som").get_data().data for tube in model.config["tubes"] ], axis=2).flatten() case_data.append(somdata) case_data = np.array(case_data) perplexity = 50 umap_som_all = UMAP(n_neighbors=30).fit_transform(case_data) plot_embedded(umap_som_all, all_labels, all_groups, colors=colors).savefig( str(embedding_path / f"umap_som_all.png"), dpi=300) tsne_som_all = manifold.TSNE( perplexity=perplexity).fit_transform(case_data) plot_embedded(tsne_som_all, all_labels, all_groups, colors=colors).savefig( str(embedding_path / f"tsne_som_all_p{perplexity}.png"), dpi=300) # plot legend fig = plt.figure() patches = [ mpl.patches.Patch(color=color, label=group) for group, color in zip(all_groups, colors) ] fig.legend(patches, all_groups, loc='center', frameon=False) fig.savefig(str(embedding_path / "legend.png"), dpi=300)
def main(args): dataset = som_dataset.SOMDataset.from_path(args.input) val = args.val train = args.train OUTPUT = args.output PANEL = args.panel basemodel = args.basemodel bal = args.bal # set the groups according to the panel if PANEL == "MLL": groups = GROUPS elif PANEL == "ERLANGEN": groups = ["CLL", "MBL", "MCL", "LPL", "MZL", "FL", "HCL", "normal"] else: groups = ["CLL", "MCL", "LPL", "MZL", "FL", "HCL", "normal"] tubes = ("1") mapping = None balance = dict((key, bal) for key in groups) config = classifier.SOMClassifierConfig( **{ "tubes": {tube: dataset.config[tube] for tube in tubes}, "groups": groups, "pad_width": 2, "mapping": mapping, "cost_matrix": None, "train_epochs": 15, }) val = io_functions.load_json(val) validate_dataset = dataset.filter(labels=val) labels = io_functions.load_json(train) train_dataset = dataset.filter(labels=labels) train_dataset, validate_dataset = fc_api.prepare_classifier_train_dataset( train_dataset, split_ratio=0.9, groups=groups, mapping=mapping, balance=balance, val_dataset=validate_dataset) print(train_dataset.group_count) print(validate_dataset.group_count) # load base model and get weights base_model = models.load_model(str(basemodel / "model.h5")) weights = base_model.get_weights() # create model model = create_model(config.inputs, config.output) model.set_weights(weights) # freeze 2 dense layers: check for each dataset model.get_layer('dense_1').trainable = False model.get_layer('dense_2').trainable = False model.compile(loss=config.get_loss(modeldir=None), optimizer="adam", metrics=["accuracy"]) # cast to SOMConfig instance model = SOMClassifier(config, model) train = model.create_sequence(train_dataset, config.train_batch_size) if validate_dataset is not None: validate = model.create_sequence(validate_dataset, config.valid_batch_size) else: validate = None model.train_generator(train, validate, epochs=config.train_epochs, class_weight=None) model.save(OUTPUT) model.save_information(OUTPUT)
# create model model = create_model(config.inputs, config.output) model.set_weights(weights) # freeze 2 dense layers: check for each dataset model.get_layer('dense_1').trainable = False model.get_layer('dense_2').trainable = False model.compile(loss=config.get_loss(modeldir=None), optimizer="adam", metrics=["accuracy"]) # cast to SOMConfig instance model = SOMClassifier(config, model) train = model.create_sequence(train_dataset, config.train_batch_size) if validate_dataset is not None: validate = model.create_sequence(validate_dataset, config.valid_batch_size) else: validate = None model.train_generator(train, validate, epochs=config.train_epochs, class_weight=None) model.save(OUTPUT)