Пример #1
0
        'rawdata_number.json').st_size:
    sample_number_dict = python_tools.load_fn_to_obj('rawdata_number.json')
else:
    sample_number_dict = {}

total_size = []
cp_data_info = open(cp_data_info_file, 'w')
for each in sample_dict:
    if each in sample_number_dict:
        sample_dict[each].pre_num = sample_number_dict[each]
    else:
        sample_number_dict[each] = len(sample_dict[each].read1)
    cmd_line = sample_dict[each].add_sup_data(args.out_dir)
    for n, each_fq in enumerate(sample_dict[each].read1):
        read1_fq = sample_dict[each].read1[n]
        read2_fq = sample_dict[each].read2[n]
        read1_fq_size = os.stat(read1_fq).st_size / float(1024**3)
        read2_fq_size = os.stat(read2_fq).st_size / float(1024**3)
        total_size.extend([read1_fq_size, read2_fq_size])
        read1_fq_size_out = round(read1_fq_size, 2)
        read2_fq_size_out = round(read2_fq_size, 2)
        cp_data_info.write('%s\t%s\t%sG\t%s\t%sG\n' %
                           (sample_dict[each].name, read1_fq,
                            read1_fq_size_out, read2_fq, read2_fq_size_out))
    python_tools.write_obj_to_file(cmd_line, cp_cmd, True)
cp_data_info.write('total : %sG' % round(sum(total_size), 2))
cp_data_info.close()

cp_data_info_json = os.path.join(cwd, 'rawdata_number.json')
python_tools.write_obj_to_json(sample_number_dict, cp_data_info_json)
Пример #2
0
import json
import re
from os import path
import sys

script_path = path.dirname(path.abspath(__file__))
RNAseq_lib_path = path.join(script_path, '..')
sys.path.insert(0, RNAseq_lib_path)
from RNAseq_lib import KEGG_ORGANISM_TXT
from RNAseq_lib import KEGG_ORGANISM_JSON
from python_tools import write_obj_to_json

kegg_name_map_dict = {}
with open(KEGG_ORGANISM_TXT) as kegg_organism_txt_inf:
    for eachline in kegg_organism_txt_inf:
        eachline_inf = eachline.rstrip().split('\t')
        kegg_sp = eachline_inf[1]
        latin_info = eachline_inf[2]
        if '(' in latin_info:
            latin_name = re.match(r'(.*)\(',
                                  latin_info).groups()[0].lower().strip()
            latin_name = re.sub(' ', '_', latin_name)
        else:
            latin_name = re.sub(' ', '_', latin_info.lower())
        kegg_name_map_dict[latin_name] = kegg_sp

write_obj_to_json(kegg_name_map_dict, KEGG_ORGANISM_JSON)
Пример #3
0
                group_dict[each_group][1].append(each_group_exp_list)
            non_rep_group_list, non_rep_sample_list, tpm_max = get_group_reproducibility(
                gene_id, group_exp_dict, group_dict, reproducibility_dict)
            all_non_rep_group_list.extend(non_rep_group_list)
            all_non_rep_sample_list.extend(non_rep_sample_list)
            rep_num = group_num - len(non_rep_group_list)
            rep_stat = '%s/%s' % (rep_num, group_num)
            rep_percentage = round(100 * rep_num / float(group_num), 2)
            non_rep_group_out = ','.join(non_rep_group_list)
            non_rep_sample_out = ','.join(non_rep_sample_list)
            gene_rep_status.write(
                '{gene_id}\t{rep_stat}\t{rep_percentage}\t{non_rep_group_out}\t{non_rep_sample_out}\t{tpm_max}\n'
                .format(**locals()))

if not os.path.exists(group_exp_dict_json):
    python_tools.write_obj_to_json(group_exp_dict, group_exp_dict_json)

all_exp_mean = numpy.mean(all_exp_list)

gene_count = len(group_exp_dict.keys())
gene_rep_summary_list = []
tpm_breaks = ['Group']
tmp_breaks_genes = []
group_rep_dict = {}
with open(gene_rep_detail_file, 'w') as gene_rep_detail:
    for n, each_tpm in enumerate(tpm_cutoff):
        non_rep_group = []
        non_rep_sample = []
        if n + 1 < len(tpm_cutoff):
            flag = '%s<TPM<=%s' % (tpm_cutoff[n], tpm_cutoff[n + 1])
        else:
Пример #4
0
gene_interpro_id_file = sys.argv[1]
interpro_map = sys.argv[2]
gene_interpro_des_file = sys.argv[3]

interpro_map_dict = {}

if interpro_map.endswith('json'):
    interpro_map_dict = python_tools.load_fn_to_obj(interpro_map)
else:
    with open(interpro_map) as interpro_map_inf:
        for eachline in interpro_map_inf:
            eachline_inf = eachline.strip().split('\t')
            interpro_map_dict[eachline_inf[0]] = eachline_inf[1]
    interpro_map_json = '%s.json' % interpro_map
    python_tools.write_obj_to_json(interpro_map_dict, interpro_map_json)

gene_inf_dict = {}
with open(gene_interpro_id_file) as gene_interpro_id:
    for n, eachline in enumerate(gene_interpro_id):
        if n != 0:
            eachline_inf = eachline.strip().split(',')
            gene_id = eachline_inf[0]
            gene_name = eachline_inf[1]
            interpro_id = eachline_inf[2]
            if gene_id not in gene_inf_dict:
                gene_inf_dict[gene_id] = [[], [], []]
            if gene_name != '':
                if gene_name not in gene_inf_dict[gene_id][0]:
                    gene_inf_dict[gene_id][0].append(gene_name)
            if interpro_id != '':