def concatenate_all_the_files(sample): cmds = "cd $sample/accungs_associations; cat *txt>all.txt" cmd_file = "/sternadi/home/volume3/okushnir/Cluster_Scripts/cat_txt.cmd" pbs_jobs.create_pbs_cmd(cmd_file, alias="cat_txt", gmem=3, cmds=cmds, load_python=False) job_id = pbs_jobs.submit("-v sample='%s' %s" % (sample, cmd_file)) print(job_id) status = pbs_jobs.check_pbs(job_id) if status == "Done": print("Done!")
def blast_muation_creator(): cmds = "for sample in 1 4 5 9 16 17 20; do cd /sternadi/home/volume3/okushnir/AccuNGS/20201008RV-202329127/merged/" \ "patients/Patient_${sample}/20201124_q30_consensusX7/; cat mutations_all.txt | grep -v ref_pos > " \ "mutations_all.txt.cropped ; for file in `ls tmp/*.blast`; do cat $file >> all_parts.blast ; done ; " \ "cat all_parts.blast | cut -f1,2,3 > all_parts.blast.cropped ; done" cmd_file = "/sternadi/home/volume3/okushnir/Cluster_Scripts/all_parts.cmd" pbs_jobs.create_pbs_cmd(cmd_file, alias="all_parts", gmem=3, cmds=cmds, load_python=False) job_id = pbs_jobs.submit(cmd_file) status = pbs_jobs.check_pbs(job_id) if status == "Done": print("Done!")
def blast_muation_creator(cmds, sample): cmd_file = "/sternadi/home/volume3/okushnir/Cluster_Scripts/all_parts.cmd" pbs_jobs.create_pbs_cmd(cmd_file, alias="all_parts", gmem=3, cmds=cmds, load_python=False) job_id = pbs_jobs.submit("-v sample='%s' %s" % (sample, cmd_file)) print("-v sample='%s' %s" % (sample, cmd_file)) print(job_id) status = pbs_jobs.check_pbs(job_id) if status == "Done": print("Done!")
def main(args): # def main(): sample = args.sample # patient_no = str(sample).split("/")[-3].split("_")[-1] """1. Create all_parts.blast, all_parts.blast.cropped, mutations_all.txt.cropped""" # cmds = "for sample in 1 4 5 9 16 17 20; do cd /sternadi/home/volume3/okushnir/AccuNGS/20201008RV-202329127/merged/patients/Patient_$sample/20201017_q30_consensusX5/; cat mutations_all.txt | grep -v ref_pos > mutations_all.txt.cropped ; for file in `ls tmp/*.blast`; do cat $file >> all_parts.blast ; done ; cat all_parts.blast | cut -f1,2,3 > all_parts.blast.cropped ; done" # cmd_file = "/sternadi/home/volume3/okushnir/Cluster_Scripts/all_parts.cmd" # pbs_jobs.create_pbs_cmd(cmd_file, alias="all_parts", gmem=3, cmds=cmds, load_python=False) # job_id = pbs_jobs.submit(cmd_file) # status = pbs_jobs.check_pbs(job_id) # if status == "Done": # print("Done!") """2. Run variants_on_same_read.py""" cmds = "base=$sample\n" \ "freqs=`ls ${base} | grep .freqs`\n" \ "echo ${freqs}\n" \ "mkdir ${base}/accungs_associations\n" \ "python /sternadi/home/volume3/okushnir/PhD/variants_on_same_read.py ${base}/all_parts.blast.cropped ${base}/mutations_all.txt.cropped $PBS_ARRAY_INDEX ${base}/${freqs} > ${base}/accungs_associations/$PBS_ARRAY_INDEX.txt" cmd_file = "/sternadi/home/volume3/okushnir/Cluster_Scripts/co_occur.cmd" section_lst = ["2000-3335"] # "336-1999", for jnum in section_lst: cmds = "base=$sample\n" \ "freqs=`ls ${base} | grep freqs`\n" \ "mkdir ${base}/accungs_associations\n" \ "python /sternadi/home/volume3/okushnir/PhD/variants_on_same_read.py ${base}/all_parts.blast.cropped ${base}/mutations_all.txt.cropped $PBS_ARRAY_INDEX ${base}/${freqs} > ${base}/accungs_associations/$PBS_ARRAY_INDEX.txt" cmd_file = "/sternadi/home/volume3/okushnir/Cluster_Scripts/co_occur.cmd" pbs_jobs.create_array_pbs_cmd(cmd_file, jnum, alias="accungs_assoc", gmem=3, cmds=cmds) print("qsub -v sample='%s' %s" % (sample, cmd_file)) job_id = pbs_jobs.submit("-v sample='%s' %s" % (sample, cmd_file)) # print(job_id) job_id = job_id.replace("[]", "") print(job_id) status = pbs_jobs.check_pbs(job_id) if status == "Done": print("Done %s" % (jnum)) print("Done!!!!") """3. Concatenate all the files""" # cmds = "cd $sample/accungs_associations; cat *txt>all.txt" # cmd_file = "/sternadi/home/volume3/okushnir/Cluster_Scripts/cat_txt.cmd" # pbs_jobs.create_pbs_cmd(cmd_file, alias="cat_txt", gmem=3, cmds=cmds, load_python=False) # job_id = pbs_jobs.submit("-v sample='%s' %s" % (sample, cmd_file)) # print(job_id) # status = pbs_jobs.check_pbs(job_id) # if status == "Done": # print("Done!") """4. Run collect_cooccurs and merge it to freqs file"""
def running_variants_on_the_same_read(sample, section_lst): for jnum in section_lst: cmds = "base=$sample\n" \ "freqs=`ls ${base} | grep freqs`\n" \ "mkdir ${base}/accungs_associations\n" \ "python /sternadi/home/volume1/maozgelbart/variants_on_same_read.py ${base}/all_parts.blast.cropped ${base}/mutations_all.txt.cropped $PBS_ARRAY_INDEX ${base}/${freqs} > ${base}/accungs_associations/$PBS_ARRAY_INDEX.txt" cmd_file = "/sternadi/home/volume3/okushnir/Cluster_Scripts/co_occur.cmd" pbs_jobs.create_array_pbs_cmd(cmd_file, jnum, alias="accungs_assoc", gmem=3, cmds=cmds) print("qsub -v sample='%s' %s" % (sample, cmd_file)) job_id = pbs_jobs.submit("-v sample='%s' %s" % (sample, cmd_file)) # print(job_id) job_id = job_id.replace("[]", "") print(job_id) status = pbs_jobs.check_pbs(job_id) if status == "Done": print("Done %s" % (jnum)) print("Done!!!!")
def main(args): virus = args.virus passages = args.passages without_passage = args.without from_passage = int(passages.split("p")[1].split("-")[0]) to_passage = int(passages.split("-p")[1]) no_generations = to_passage input_dir = args.input_dir # directory contains q23_data_mutation.csv - "/Volumes/STERNADILABHOME$/volume3/okushnir/Cirseq/PV/OPV/39mixMOI/" pipline_quality = args.quality mutation_type = args.mutation_type replica_no = args.replica_no mutation_all_at_once = args.mutation_all_at_once fits_dir = input_dir + "fits/" try: os.mkdir(fits_dir) except OSError: print("Creation of the directory %s failed" % fits_dir) else: print("Successfully created the directory %s " % fits_dir) fits_input_dir = fits_dir + "input/" try: os.mkdir(fits_input_dir) except OSError: print("Creation of the directory %s failed" % fits_input_dir) else: print("Successfully created the directory %s " % fits_input_dir) fits_input_pass_dir = fits_input_dir + "%s/" % passages try: os.mkdir(fits_input_pass_dir) except OSError: print("Creation of the directory %s failed" % fits_input_pass_dir) else: print("Successfully created the directory %s " % fits_input_pass_dir) mutation_lst = ["CU", "AG", "AG_adar", "AG_nonadar", "GA", "UC"] fits_input_dir = input_dir + "fits/input/%s/" % passages """1. Create fits dataset from data_mutation.csv file""" print("Creating fits dataset from data_mutation.csv file...") start_pos_dict = {"OPV": 3832, "RVB14": 3635, "CVB3": 3745, "PV": 3833} # start from 2B for all viruses start_position = checkKey(start_pos_dict, virus) fits_data_construction(input_dir, fits_input_pass_dir, from_passage, to_passage, pipline_quality, start_position, replica_no, without_passage) # break the dataset to positions datasets print("breaking the dataset to positions sub-datasets") for mutation in mutation_lst: df_mutation = pd.read_csv(fits_input_pass_dir + "final_%sMutations_sorted_%s.txt" % (mutation_type, mutation), sep="\t") fits_input_pass_mutation_dir = fits_input_pass_dir + "%s" % mutation try: os.mkdir(fits_input_pass_mutation_dir) except OSError: print("Creation of the directory %s failed" % fits_input_pass_mutation_dir) else: print("Successfully created the directory %s " % fits_input_pass_mutation_dir) for position in df_mutation["pos"].iloc[0:-1]: df_pos = df_mutation.groupby(["pos"]).get_group(position) df_pos.to_csv(fits_input_pass_dir + "%s/final_%sMutations_sorted_%s.txt" % (mutation, mutation_type, position), index=False, sep="\t") """2. Run FITS_jobarray_mutation.cmd""" if virus == "OPV": no_generations = (to_passage*2)+1 else: no_generations = to_passage with open(fits_input_dir+"parameters_mutation.txt", "w") as parameter_mutation: parameter_mutation.write("# basic parameters\n" "N 1000000\n" "num_alleles 2\n" "num_generations %s\n" "min_log_mutation_rate0_0 -9\n" "min_log_mutation_rate0_1 -9\n" "min_log_mutation_rate1_0 -9\n" "min_log_mutation_rate1_1 -9\n" "max_log_mutation_rate0_0 -3\n" "max_log_mutation_rate0_1 -3\n" "max_log_mutation_rate1_0 -3\n" "max_log_mutation_rate1_1 -3\n" "# bottleneck\n" "bottleneck_interval 2\n" "bottleneck_size 1000000\n" "fitness_allele0 1.0\n" "fitness_allele1 1.0\n" "num_samples_from_prior 100000\n" "acceptance_rate 0.01" % (no_generations)) try: os.mkdir(input_dir + "fits/output/") except OSError: print("Creation of the directory %s failed" % input_dir + "fits/output/") else: print("Successfully created the directory %s " % input_dir + "fits/output/") try: os.mkdir(input_dir + "fits/output/mutation/") except OSError: print("Creation of the directory %s failed" % input_dir + "fits/output/mutation/") else: print("Successfully created the directory %s " % input_dir + "fits/output/mutation/") fits_mutation_output_dir = input_dir + "fits/output/mutation/%s/" % passages try: os.mkdir(fits_mutation_output_dir) except OSError: print("Creation of the directory %s failed" % fits_mutation_output_dir) else: print("Successfully created the directory %s " % fits_mutation_output_dir) if mutation_all_at_once == "off": if mutation_type == "all": cmds = "module load gcc/gcc-8.2.0\n" \ "VIRUS=$VIRUS\n" \ "PASSAGES=$PASSAGES\n" \ "fits_dir=$fits_dir\n" \ "PARAM='mutation'\n" \ "namesj=('AG' 'UC' 'GA' 'CU' 'AG_adar' 'AG_nonadar')\n"\ "j=$(($[PBS_ARRAY_INDEX-1]%${#namesj[@]}))\n"\ "i=$(($[PBS_ARRAY_INDEX-1-j]/${#namesj[@]}))\n" \ "element=$(ls ${fits_dir}/input/${PASSAGES}/${namesj[j]}| grep 'all'| grep -oh '[0-9]'*)\n"\ "namesi=($element)\n" \ "for element in '${namesi[@]}';do echo '$element';done\n" \ "mkdir ${fits_dir}/output/${PARAM}/${PASSAGES}\n" \ "mkdir ${fits_dir}/output/${PARAM}/${PASSAGES}/${namesj[j]}\n" \ "/sternadi/home/volume1/talzinger/FITS_Analyses/FITS_bin/fits1.3.3 -$PARAM ${fits_dir}/input/${PASSAGES}/" \ "parameters_${PARAM}.txt ${fits_dir}/input/${PASSAGES}/${namesj[j]}/final_allMutations_sorted_" \ "${namesi[i]}.txt ${fits_dir}/output/${PARAM}/${PASSAGES}/${namesj[j]}/posterior_${PARAM}_all_${namesi[i]}.txt " \ "${fits_dir}/output/${PARAM}/${PASSAGES}/${namesj[j]}/summary_${PARAM}_all_${namesi[i]}.txt" if mutation_type == "syn": cmds = "module load gcc/gcc-8.2.0\n" \ "VIRUS=$VIRUS\n" \ "PASSAGES=$PASSAGES\n" \ "fits_dir=$fits_dir\n" \ "PARAM='mutation'\n" \ "namesj=('AG' 'UC' 'GA' 'CU' 'AG_adar' 'AG_nonadar')\n"\ "j=$(($[PBS_ARRAY_INDEX-1]%${#namesj[@]}))\n"\ "i=$(($[PBS_ARRAY_INDEX-1-j]/${#namesj[@]}))\n" \ "element=$(ls ${fits_dir}/input/${PASSAGES}/${namesj[j]}| grep 'syn'| grep -oh '[0-9]'*)\n"\ "namesi=($element)\n" \ "for element in '${namesi[@]}';do echo '$element';done\n" \ "mkdir ${fits_dir}/output/${PARAM}/${PASSAGES}\n" \ "mkdir ${fits_dir}/output/${PARAM}/${PASSAGES}/${namesj[j]}\n" \ "/sternadi/home/volume1/talzinger/FITS_Analyses/FITS_bin/fits1.3.3 -$PARAM ${fits_dir}/input/${PASSAGES}/" \ "parameters_${PARAM}.txt ${fits_dir}/input/${PASSAGES}/${namesj[j]}/final_synMutations_sorted_" \ "${namesi[i]}.txt ${fits_dir}/output/${PARAM}/${PASSAGES}/${namesj[j]}/posterior_${PARAM}_syn_${namesi[i]}.txt " \ "${fits_dir}/output/${PARAM}/${PASSAGES}/${namesj[j]}/summary_${PARAM}_syn_${namesi[i]}.txt" part_lst = [] for mutation in mutation_lst: list = os.listdir(fits_input_dir + mutation) # dir is your directory path number_files = len(list) part_lst.append(number_files) n = max(part_lst) print(n) a1 = 1 d = len(mutation_lst) jnum_tmp = a1 + d * (n - 1) print(jnum_tmp) jnum_quarter = int(np.round(jnum_tmp/4)) jnum1 = "1-%s" % (str(jnum_quarter)) jnum2 = "%s-%s" % (str(jnum_quarter+1), str(2*jnum_quarter)) jnum3 = "%s-%s" % (str((2*jnum_quarter)+1), str(3*jnum_quarter)) jnum4 = "%s-%s" % (str((3*jnum_quarter)+1), str(4*jnum_quarter)) jnum_lst = [jnum1, jnum2, jnum3, jnum4] for jnum in jnum_lst: cmd_file = "/sternadi/home/volume3/okushnir/Cluster_Scripts/new_FITS_jobarray_mutation_%s.cmd" % virus pbs_jobs.create_array_pbs_cmd(cmd_file, jnum, alias="Fits_mutation", gmem=3, cmds=cmds) print("qsub -v VIRUS='%s',PASSAGES='%s', fits_dir='%s' %s" % (virus, passages, fits_dir, cmd_file)) job_id = pbs_jobs.submit("-v VIRUS='%s',PASSAGES='%s, fits_dir=%s' %s" % (virus, passages, fits_dir, cmd_file)) job_id = job_id.split("[")[0] print("Running new_FITS_jobarray_mutation_%s.cmd, job_id:%s" % (virus, job_id)) status = pbs_jobs.check_pbs(job_id) if status == "Done": print("Done: %s" % jnum) # For all position at once else: cmds_all = "module load gcc/gcc-8.2.0\n" \ "VIRUS=$VIRUS\n" \ "PASSAGES=$PASSAGES\n" \ "fits_dir=$fits_dir\n" \ "PARAM='mutation'\n" \ "namesj=('AG' 'UC' 'GA' 'CU' 'AG_adar' 'AG_nonadar')\n" \ "namesi=('syn')\n" \ "j=$(($[PBS_ARRAY_INDEX-1]%${#namesj[@]}))\n" \ "i=$(($[PBS_ARRAY_INDEX-1-j]/${#namesj[@]}))\n"\ "/sternadi/home/volume1/talzinger/FITS_Analyses/FITS_bin/fits1.3.3 -$PARAM " \ "${fits_dir}/input/${PASSAGES}/parameters_$" \ "{PARAM}.txt ${fits_dir}/input/${PASSAGES}/" \ "final_${namesi[i]}Mutations_sorted_${namesj[j]}.txt ${fits_dir}/output/${PARAM}/${PASSAGES}/" \ "posterior_${PARAM}_${namesi[$i]}_${namesj[$j]}.txt " \ "${fits_dir}/output/${PARAM}/${PASSAGES}/summary_${PARAM}_${namesi[$i]}_${namesj[$j]}.txt" cmd_file = "/sternadi/home/volume3/okushnir/Cluster_Scripts/FITS_mutation_%s.cmd" % virus jnum_all = "1-6" pbs_jobs.create_array_pbs_cmd(cmd_file, jnum_all, alias="Fits_mutation", gmem=3, cmds=cmds_all) print("qsub -v VIRUS='%s',PASSAGES='%s, fits_dir=%s' %s" % (virus, passages, fits_dir, cmd_file)) job_id = pbs_jobs.submit("-v VIRUS='%s',PASSAGES='%s, fits_dir=%s' %s" % (virus, passages, fits_dir, cmd_file)) job_id = job_id.split("[")[0] print("Running FITS_mutation_%s.cmd, job_id:%s" % (virus, job_id)) status = pbs_jobs.check_pbs(job_id) if status == "Done": print("Mutation rate AllAtOnce is DONE!") """3. Run fits_mutation_united""" if mutation_all_at_once == "off": if mutation_type == "all": for mutation in mutation_lst: output_mutation_dir = input_dir + "fits/output/mutation/%s/%s/" % (passages, mutation) output_file = output_mutation_dir + "all.txt" print("Creating the mutation conjugated report of: %s" % output_mutation_dir) fits_mutation_united_all.fits_mutation_united_all(output_mutation_dir, output_file) if mutation_type == "syn": for mutation in mutation_lst: output_mutation_dir = input_dir + "fits/output/mutation/%s/%s/" % (passages, mutation) output_file = output_mutation_dir + "all.txt" print("Creating the mutation conjugated report of: %s" % output_mutation_dir) fits_mutation_united_syn.fits_mutation_united_syn(output_mutation_dir, output_file) """4. Run fits_parameters.py""" print("Creating fitness parameters") fits_parameters_pos.fitness_parameters_pos(input_dir=fits_mutation_output_dir, output_dir=fits_input_dir) """5. Run new_FITS_jobarray_fitness.cmd""" try: os.mkdir(input_dir + "fits/output/fitness/") except OSError: print("Creation of the directory %s failed" % (input_dir + "fits/output/fitness/")) else: print("Successfully created the directory %s " % (input_dir + "fits/output/fitness/")) if mutation_type == "all": cmds = "module load gcc/gcc-8.2.0\n" \ "VIRUS=$VIRUS\n" \ "PASSAGES=$PASSAGES\n" \ "fits_dir=$fits_dir\n" \ "PARAM='fitness'\n" \ "namesj=('AG' 'UC' 'GA' 'CU' 'AG_adar' 'AG_nonadar')\n"\ "j=$(($[PBS_ARRAY_INDEX-1]%${#namesj[@]}))\n"\ "i=$(($[PBS_ARRAY_INDEX-1-j]/${#namesj[@]}))\n" \ "element=$(ls ${fits_dir}/input/${PASSAGES}/${namesj[j]}| grep 'all'| grep -oh '[0-9]'*)\n"\ "namesi=($element)\n" \ "for element in '${namesi[@]}';do echo '$element';done\n" \ "mkdir ${fits_dir}/output/${PARAM}/${PASSAGES}\n" \ "mkdir ${fits_dir}/output/${PARAM}/${PASSAGES}/${namesj[j]}\n" \ "/sternadi/home/volume1/talzinger/FITS_Analyses/FITS_bin/fits1.3.3 -$PARAM ${fits_dir}/input/${PASSAGES}/" \ "parameters_${PARAM}_${namesj[j]}.txt ${fits_dir}/input/${PASSAGES}/${namesj[j]}/final_allMutations_sorted_" \ "${namesi[i]}.txt ${fits_dir}/output/${PARAM}/${PASSAGES}/${namesj[j]}/posterior_${PARAM}_all_${namesi[i]}.txt " \ "${fits_dir}/output/${PARAM}/${PASSAGES}/${namesj[j]}/summary_${PARAM}_all_${namesi[i]}.txt" if mutation_type == "syn": cmds = "module load gcc/gcc-8.2.0\n" \ "VIRUS=$VIRUS\n" \ "PASSAGES=$PASSAGES\n" \ "fits_dir=$fits_dir\n" \ "PARAM='fitness'\n" \ "namesj=('AG' 'UC' 'GA' 'CU' 'AG_adar' 'AG_nonadar')\n"\ "j=$(($[PBS_ARRAY_INDEX-1]%${#namesj[@]}))\n"\ "i=$(($[PBS_ARRAY_INDEX-1-j]/${#namesj[@]}))\n" \ "element=$(ls ${fits_dir}/input/${PASSAGES}/${namesj[j]}| grep 'syn'| grep -oh '[0-9]'*)\n"\ "namesi=($element)\n" \ "for element in '${namesi[@]}';do echo '$element';done\n" \ "mkdir ${fits_dir}/output/${PARAM}/${PASSAGES}\n" \ "mkdir ${fits_dir}/output/${PARAM}/${PASSAGES}/${namesj[j]}\n" \ "/sternadi/home/volume1/talzinger/FITS_Analyses/FITS_bin/fits1.3.3 -$PARAM ${fits_dir}/input/${PASSAGES}/" \ "parameters_${PARAM}_${namesj[j]}.txt ${fits_dir}/input/${PASSAGES}/${namesj[j]}/final_synMutations_sorted_" \ "${namesi[i]}.txt ${fits_dir}/output/${PARAM}/${PASSAGES}/${namesj[j]}/posterior_${PARAM}_syn_${namesi[i]}.txt " \ "${fits_dir}/output/${PARAM}/${PASSAGES}/${namesj[j]}/summary_${PARAM}_syn_${namesi[i]}.txt" part_lst = [] for mutation in mutation_lst: list = os.listdir(fits_input_dir + mutation) # dir is your directory path number_files = len(list) part_lst.append(number_files) n = max(part_lst) print(n) a1 = 1 d = len(mutation_lst) jnum_tmp = a1 + d * (n - 1) print(jnum_tmp) jnum_quarter = int(np.round(jnum_tmp/4)) jnum1 = "1-%s" % (str(jnum_quarter)) jnum2 = "%s-%s" % (str(jnum_quarter+1), str(2*jnum_quarter)) jnum3 = "%s-%s" % (str((2*jnum_quarter)+1), str(3*jnum_quarter)) jnum4 = "%s-%s" % (str((3*jnum_quarter)+1), str(4*jnum_quarter)) jnum_lst = [jnum1, jnum2, jnum3, jnum4] for jnum in jnum_lst: cmd_file = "/sternadi/home/volume3/okushnir/Cluster_Scripts/new_FITS_jobarray_fitness_%s.cmd" % virus pbs_jobs.create_array_pbs_cmd(cmd_file, jnum, alias="Fits_fitness", gmem=3, cmds=cmds) print("qsub -v VIRUS='%s',PASSAGES='%s', fits_dir='%s' %s" % (virus, passages, fits_dir, cmd_file)) job_id = pbs_jobs.submit("-v VIRUS='%s',PASSAGES='%s, fits_dir=%s' %s" % (virus, passages, fits_dir, cmd_file)) job_id = job_id.split("[")[0] print("Running new_FITS_jobarray_fitness_%s.cmd, job_id:%s" % (virus, job_id)) status = pbs_jobs.check_pbs(job_id) if status == "Done": print("Done: %s" % jnum) """6. Run fits_fitness_united""" if mutation_type == "all": for mutation in mutation_lst: output_fitness_dir = input_dir + "fits/output/fitness/%s/%s/" % (passages, mutation) output_file = output_fitness_dir + "all.txt" print("Creating the fitness conjugated report of: %s" % output_fitness_dir) fits_fitness_united_all(output_fitness_dir, output_file) if mutation_type == "syn": for mutation in mutation_lst: output_fitness_dir = input_dir + "fits/output/fitness/%s/%s/" % (passages, mutation) output_file = output_fitness_dir + "all.txt" print("Creating the fitness conjugated report of: %s" % output_fitness_dir) fits_fitness_united_syn(output_fitness_dir, output_file)