def cluster_count_matrix(config_file, lane_id, strain_fmt_string, cond_fmt_string): config_params = cfp.parse(config_file) sample_detection_limit, control_detection_limit = get_detection_limits(config_params) # If the file does not exist, then do not attempt to cluster it! try: genes, conditions, matrix = load_dumped_count_matrix(config_params, lane_id) except IOError: print "could not find '{}' count matrix".format(lane_id) return None thresholded_matrix = matrix thresholded_matrix[thresholded_matrix < sample_detection_limit] = sample_detection_limit logged_matrix = np.log2(thresholded_matrix) # Customize the strain and condition names for interpretable visualization! custom_genes = customize_strains(genes, config_params, strain_fmt_string) custom_conditions = customize_conditions(conditions, config_params, cond_fmt_string) dataset = [custom_genes, custom_conditions, logged_matrix] record, rows_tree, cols_tree = clus.cluster(dataset) f = get_clustered_count_matrix_filename(config_params, lane_id) record.save(f, rows_tree, cols_tree)
def cluster_zscore_matrix(config_file, lane_id, strain_fmt_string, cond_fmt_string): config_params = cfp.parse(config_file) # If the file does not exist, then do not attempt to cluster it! try: genes, conditions, matrix = load_dumped_zscore_matrix(config_params, lane_id) except IOError: print "could not find '{}' zscore matrix".format(lane_id) return None # Customize the strain and condition names for interpretable visualization! strain_table = get_barcode_table(config_params) sample_table = get_sample_table(config_params) custom_genes = customize_strains(genes, strain_table, strain_fmt_string) custom_conditions = customize_conditions(conditions, sample_table, cond_fmt_string) dataset = [custom_genes, custom_conditions, matrix] record, rows_tree, cols_tree = clus.cluster(dataset) f = get_clustered_zscore_matrix_filename(config_params, lane_id) record.save(f, rows_tree, cols_tree) # return the filename so the cdt/atr/gtr files can be copied to a directory with all # of the other clustergrams and eventually tarred/gzipped for distribution! return f
def cluster_one_stacked_matrix(dataset, matrix_id, strain_table, sample_table, strain_fmt_string, cond_fmt_string, output_folder, new_matrix = None, verbosity = 1): genes, conditions, matrix = dataset custom_genes = customize_strains(genes, strain_table, strain_fmt_string, verbosity = verbosity) custom_conditions = customize_conditions(conditions, sample_table, cond_fmt_string, verbosity = verbosity) dataset = [custom_genes, custom_conditions, matrix] f = os.path.join(output_folder, matrix_id) record, rows_tree, cols_tree = clus.cluster(dataset, file_base = f, new_matrix = new_matrix) record.save(f, rows_tree, cols_tree) # return the filename so the cdt/atr/gtr files can be copied to a directory with all # of the other clustergrams and eventually tarred/gzipped for distribution! return f