accs = np.zeros((2, len(num_cluster))) for i in range(len(num_cluster)): k = num_cluster[i] print('Iteration {0}, num-cluster={1}'.format(i, k)) # -------------------------------------------------- # 3.1. SETUP SOURCE DATA NMF CLUSTERING # -------------------------------------------------- if labels is None: # No source labels are provided, generate them via NMF clustering nmf_labels = None nmf_labels = NmfClustering(data, gene_ids, num_cluster=k, labels=[]) nmf_labels.add_cell_filter(cell_filter_fun) nmf_labels.add_gene_filter(gene_filter_fun) nmf_labels.set_data_transformation(data_transf_fun) nmf_labels.apply(k=k, alpha=arguments.nmf_alpha, l1=arguments.nmf_l1, max_iter=arguments.nmf_max_iter, rel_err=arguments.nmf_rel_err) labels = nmf_labels.cluster_labels # Use perfect number of latent states for nmf and sc3 src_labels = np.array(labels, dtype=np.int) src_lbl_set = np.unique(src_labels) k_now = src_lbl_set.size nmf = None nmf = NmfClustering_initW(data, gene_ids, labels=labels, num_cluster=k_now) nmf.add_cell_filter(cell_filter_fun) nmf.add_gene_filter(gene_filter_fun) nmf.set_data_transformation(data_transf_fun) nmf.apply(k=k_now, alpha=arguments.nmf_alpha, l1=arguments.nmf_l1, max_iter=arguments.nmf_max_iter, rel_err=arguments.nmf_rel_err) # --------------------------------------------------
perc_consensus_genes=perc_consensus_genes, non_zero_threshold=non_zero_threshold) data_transf_fun = sc.data_transformation_log2 # Generating labels from complete dataset print "Train complete data" complete_nmf = None complete_nmf = NmfClustering(data, np.arange(data.shape[0]), num_cluster=num_cluster) complete_nmf.add_cell_filter(cell_filter_fun) complete_nmf.add_gene_filter(gene_filter_fun) complete_nmf.set_data_transformation(data_transf_fun) complete_nmf.apply(k=num_cluster, alpha=nmf_alpha, l1=nmf_l1, max_iter=nmf_max_iter, rel_err=nmf_rel_err) # Get labels desc, target_nmf, trg_lbls_pred, mixed_data = method_sc3_filter( complete_nmf, data, [], cell_filter=cell_filter_fun, gene_filter=gene_filter_fun, transformation=data_transf_fun, mix=0.0, metric='euclidean', use_da_dists=False, n_trg_cluster=num_cluster) labels = trg_lbls_pred label_names, label_counts = np.unique(labels, return_counts=True)
data_src = data_transformation_log2(data_src) # Load Target data data_trg = np.loadtxt(path_trg) gene_ids_trg = np.loadtxt(path_geneids_trg, dtype=np.str) # Delete non-unique genes data_trg, gene_ids_trg = delete_nonunique_genes(data_trg, gene_ids_trg) # Apply cell filter valid_cells = cell_filter(data_trg) # Apply gene filter valid_genes = gene_filter(data_trg) # Create filtered data data_trg = data_trg[:, valid_cells] data_trg = data_trg[valid_genes, :] gene_ids_trg = gene_ids_trg[valid_genes] # Log transform data data_trg = data_transformation_log2(data_trg) # train source and test performance source_nmf = NmfClustering(data_src, gene_ids_src, num_cluster=n_source_cluster) source_nmf.apply(k=n_source_cluster, max_iter=100, rel_err=1e-3) # Number of repetitions can be changed in line 153 of utils.py target_nmf = DaNmfClustering(source_nmf, data_trg.copy(), gene_ids_trg, num_cluster=n_target_cluster) target_nmf.apply(k=n_target_cluster, calc_transferability=True) # target_nmf.transferability_pvalue # np.savez(fname, source_ari=source_ari, target_ari=target_ari, n_mix=n_mix, n_source=n_source, n_target=n_target, n_source_cluster=n_source_cluster, # n_target_cluster=n_target_cluster)
def method_nmf(src, src_labels, trg, trg_labels, n_src_cluster, n_trg_cluster): ids = np.arange(trg.shape[0]) cp = NmfClustering(trg, ids, num_cluster=n_trg_cluster) cp.apply() return 'NMF', cp.cluster_labels, None