Exemplo n.º 1
0
def main(samples):
	replicate_indices = indices_dict(each_replicate(samples))
	gene_count_matrix = load_gene_count_matrix()
	gene_list = map(clean_ensg_name, load_gene_list())
	sample_stats = calc_stats(samples, replicate_indices, gene_count_matrix)
	#output_tables(samples, sample_stats, gene_list)
	output_matplotlib(sample_stats, gene_list, 'delta-lin41', 'lin41-gran')
Exemplo n.º 2
0
def make_gene_count_matrix(samples, ambiguous_method):
	annot_sam_files = open_annot_sam_files(samples)
	(gene_list, gene_indices) = build_gene_indices(annot_sam_files, ambiguous_method)
	rewind_files(annot_sam_files)

	replicate_list = list(each_replicate(samples))
	replicate_indices = indices_dict(replicate_list)

	gene_count_matrix = np.zeros((len(gene_list), len(replicate_list)), np.uint32)
	for (replicate, col) in replicate_indices.iteritems():
		annot_sam_file = annot_sam_files[replicate]
		for gene in parse_annotated_sam(annot_sam_file, ambiguous_method):
			if not gene in gene_indices: continue
			row = gene_indices[gene]
			gene_count_matrix[row,col] += 1

	close_files(annot_sam_files)

	return (gene_count_matrix, gene_list, replicate_list)
Exemplo n.º 3
0
def build_gene_indices(annot_sam_files, ambiguous_method):
	gene_set = build_all_genes_set(annot_sam_files, ambiguous_method)
	gene_list = sorted_list(gene_set)
	gene_indices = indices_dict(gene_list)
	return (gene_list, gene_indices)