'rawdata_number.json').st_size: sample_number_dict = python_tools.load_fn_to_obj('rawdata_number.json') else: sample_number_dict = {} total_size = [] cp_data_info = open(cp_data_info_file, 'w') for each in sample_dict: if each in sample_number_dict: sample_dict[each].pre_num = sample_number_dict[each] else: sample_number_dict[each] = len(sample_dict[each].read1) cmd_line = sample_dict[each].add_sup_data(args.out_dir) for n, each_fq in enumerate(sample_dict[each].read1): read1_fq = sample_dict[each].read1[n] read2_fq = sample_dict[each].read2[n] read1_fq_size = os.stat(read1_fq).st_size / float(1024**3) read2_fq_size = os.stat(read2_fq).st_size / float(1024**3) total_size.extend([read1_fq_size, read2_fq_size]) read1_fq_size_out = round(read1_fq_size, 2) read2_fq_size_out = round(read2_fq_size, 2) cp_data_info.write('%s\t%s\t%sG\t%s\t%sG\n' % (sample_dict[each].name, read1_fq, read1_fq_size_out, read2_fq, read2_fq_size_out)) python_tools.write_obj_to_file(cmd_line, cp_cmd, True) cp_data_info.write('total : %sG' % round(sum(total_size), 2)) cp_data_info.close() cp_data_info_json = os.path.join(cwd, 'rawdata_number.json') python_tools.write_obj_to_json(sample_number_dict, cp_data_info_json)
import json import re from os import path import sys script_path = path.dirname(path.abspath(__file__)) RNAseq_lib_path = path.join(script_path, '..') sys.path.insert(0, RNAseq_lib_path) from RNAseq_lib import KEGG_ORGANISM_TXT from RNAseq_lib import KEGG_ORGANISM_JSON from python_tools import write_obj_to_json kegg_name_map_dict = {} with open(KEGG_ORGANISM_TXT) as kegg_organism_txt_inf: for eachline in kegg_organism_txt_inf: eachline_inf = eachline.rstrip().split('\t') kegg_sp = eachline_inf[1] latin_info = eachline_inf[2] if '(' in latin_info: latin_name = re.match(r'(.*)\(', latin_info).groups()[0].lower().strip() latin_name = re.sub(' ', '_', latin_name) else: latin_name = re.sub(' ', '_', latin_info.lower()) kegg_name_map_dict[latin_name] = kegg_sp write_obj_to_json(kegg_name_map_dict, KEGG_ORGANISM_JSON)
group_dict[each_group][1].append(each_group_exp_list) non_rep_group_list, non_rep_sample_list, tpm_max = get_group_reproducibility( gene_id, group_exp_dict, group_dict, reproducibility_dict) all_non_rep_group_list.extend(non_rep_group_list) all_non_rep_sample_list.extend(non_rep_sample_list) rep_num = group_num - len(non_rep_group_list) rep_stat = '%s/%s' % (rep_num, group_num) rep_percentage = round(100 * rep_num / float(group_num), 2) non_rep_group_out = ','.join(non_rep_group_list) non_rep_sample_out = ','.join(non_rep_sample_list) gene_rep_status.write( '{gene_id}\t{rep_stat}\t{rep_percentage}\t{non_rep_group_out}\t{non_rep_sample_out}\t{tpm_max}\n' .format(**locals())) if not os.path.exists(group_exp_dict_json): python_tools.write_obj_to_json(group_exp_dict, group_exp_dict_json) all_exp_mean = numpy.mean(all_exp_list) gene_count = len(group_exp_dict.keys()) gene_rep_summary_list = [] tpm_breaks = ['Group'] tmp_breaks_genes = [] group_rep_dict = {} with open(gene_rep_detail_file, 'w') as gene_rep_detail: for n, each_tpm in enumerate(tpm_cutoff): non_rep_group = [] non_rep_sample = [] if n + 1 < len(tpm_cutoff): flag = '%s<TPM<=%s' % (tpm_cutoff[n], tpm_cutoff[n + 1]) else:
gene_interpro_id_file = sys.argv[1] interpro_map = sys.argv[2] gene_interpro_des_file = sys.argv[3] interpro_map_dict = {} if interpro_map.endswith('json'): interpro_map_dict = python_tools.load_fn_to_obj(interpro_map) else: with open(interpro_map) as interpro_map_inf: for eachline in interpro_map_inf: eachline_inf = eachline.strip().split('\t') interpro_map_dict[eachline_inf[0]] = eachline_inf[1] interpro_map_json = '%s.json' % interpro_map python_tools.write_obj_to_json(interpro_map_dict, interpro_map_json) gene_inf_dict = {} with open(gene_interpro_id_file) as gene_interpro_id: for n, eachline in enumerate(gene_interpro_id): if n != 0: eachline_inf = eachline.strip().split(',') gene_id = eachline_inf[0] gene_name = eachline_inf[1] interpro_id = eachline_inf[2] if gene_id not in gene_inf_dict: gene_inf_dict[gene_id] = [[], [], []] if gene_name != '': if gene_name not in gene_inf_dict[gene_id][0]: gene_inf_dict[gene_id][0].append(gene_name) if interpro_id != '':