def main(samples): replicate_indices = indices_dict(each_replicate(samples)) gene_count_matrix = load_gene_count_matrix() gene_list = map(clean_ensg_name, load_gene_list()) sample_stats = calc_stats(samples, replicate_indices, gene_count_matrix) #output_tables(samples, sample_stats, gene_list) output_matplotlib(sample_stats, gene_list, 'delta-lin41', 'lin41-gran')
def make_gene_count_matrix(samples, ambiguous_method): annot_sam_files = open_annot_sam_files(samples) (gene_list, gene_indices) = build_gene_indices(annot_sam_files, ambiguous_method) rewind_files(annot_sam_files) replicate_list = list(each_replicate(samples)) replicate_indices = indices_dict(replicate_list) gene_count_matrix = np.zeros((len(gene_list), len(replicate_list)), np.uint32) for (replicate, col) in replicate_indices.iteritems(): annot_sam_file = annot_sam_files[replicate] for gene in parse_annotated_sam(annot_sam_file, ambiguous_method): if not gene in gene_indices: continue row = gene_indices[gene] gene_count_matrix[row,col] += 1 close_files(annot_sam_files) return (gene_count_matrix, gene_list, replicate_list)
def build_gene_indices(annot_sam_files, ambiguous_method): gene_set = build_all_genes_set(annot_sam_files, ambiguous_method) gene_list = sorted_list(gene_set) gene_indices = indices_dict(gene_list) return (gene_list, gene_indices)