def _profile_load_files_shared(max_read_size, min_read_no, min_read_size, seq_file_list): """ Shared function for loading seq files :param max_read_size: exclude reads with lengths > max_read_size (int) :param min_read_no: exclude reads with counts below min_read_no (int) :param min_read_size: exclude reads with lengths < min_read_size (int) :param seq_file_list: [path/to/seq/, path/to/seq2,...] (list(str)) """ print( colored("-----------------LOADING SEQUENCES----------------", 'green')) seq = SRNASeq() if len(seq_file_list) == 1: seq.load_seq_file(seq_file_list[0], max_read_size, min_read_no, min_read_size) else: seq.load_seq_file_arg_list(seq_file_list, max_read_size, min_read_no, min_read_size) seq_name = ah.single_file_output(seq_file_list[0]) if len(seq_file_list) > 1: for i in range(len(seq_file_list)): if i == 0: pass else: seq_name += "_{0}".format( ah.single_file_output(seq_file_list[i])) return seq, seq_name
def _cdp_output(counts_by_ref, file_fig, file_name, onscreen, no_csv, seq_name_1, seq_name_2, ref_file, nt, pub, bok): """ Organise csv or pdf output for CDP analysis """ ref_name = ah.single_file_output(ref_file) if file_fig or onscreen: if file_name == "auto": file_name = ah.cdp_file_output(seq_name_1, seq_name_2, ref_name, nt, "pdf") pr.cdp_plot(counts_by_ref, seq_name_1, seq_name_2, nt, onscreen, file_fig, file_name, pub, bok) if no_csv: out_csv_name = ah.cdp_file_output(seq_name_1, seq_name_2, ref_name, nt, "csv") wtf.cdp_output(counts_by_ref, seq_name_1, seq_name_2, out_csv_name)
def _cdp_load_files_shared(max_read_size, min_read_no, min_read_size, seq_file_list_1, seq_file_list_2): """ :param max_read_size: exclude reads with lengths > max_read_size (int) :param min_read_no: exclude reads with counts below min_read_no (int): :param min_read_size: exclude reads with lengths < min_read_size (int): :param seq_file_list_1: [path/to/seq/, path/to/seq2,...] (list(str)) :param seq_file_list_2: [path/to/seq/, path/to/seq2,...] (list(st_2: :return: seq1(sRNASeq), seq2 (sRNASeq), seq_name_1 (str), seq_name_2 (str) """ print( colored("-----------------LOADING SEQUENCES----------------", 'green')) seq_1 = SRNASeq() if len(seq_file_list_1) == 1: seq_1.load_seq_file(seq_file_list_1[0], max_read_size, min_read_no, min_read_size) else: seq_1.load_seq_file_arg_list(seq_file_list_1, max_read_size, min_read_no, min_read_size) seq_2 = SRNASeq() if len(seq_file_list_2) == 1: seq_2.load_seq_file(seq_file_list_2[0], max_read_size, min_read_no, min_read_size) else: seq_2.load_seq_file_arg_list(seq_file_list_2, max_read_size, min_read_no, min_read_size) seq_name_1 = ah.single_file_output(seq_file_list_1[0]) if len(seq_file_list_1) > 1: for i in range(len(seq_file_list_1)): if i == 0: pass else: seq_name_1 += "_{0}".format( ah.single_file_output(seq_file_list_1[i])) seq_name_2 = ah.single_file_output(seq_file_list_2[0]) if len(seq_file_list_2) > 1: for i in range(len(seq_file_list_2)): if i == 0: pass else: seq_name_2 += "_{0}".format( ah.single_file_output(seq_file_list_2[i])) return seq_1, seq_2, seq_name_1, seq_name_2
def _load_ref_shared(ref_file): """ Shared function for loading reference file :param ref_file: :return: """ ref = RefSeq() ref.load_ref_file(ref_file) single_ref = "" if len(ref) > 1: print("\nMultiple reference sequences in file. Exiting.\n") sys.exit() ref_output = ah.single_file_output(ref_file) for header in ref.headers(): single_ref = ref[header] print(colored("------------------ALIGNING READS------------------\n", 'green')) return ref_output, single_ref
def _load_ref_shared(ref_file): """ Shared function for loading reference file :param ref_file: :return: """ ref = RefSeq() ref.load_ref_file(ref_file) single_ref = "" if len(ref) > 1: print("\nMultiple reference sequences in file. Exiting.\n") sys.exit() ref_output = ah.single_file_output(ref_file) for header in ref.headers(): single_ref = ref[header] print( colored("------------------ALIGNING READS------------------\n", 'green')) return ref_output, single_ref
def _cdp_output(counts_by_ref, file_fig, file_name, onscreen, no_csv, seq_name_1, seq_name_2, ref_file, nt, pub): """ Organise csv or pdf output for CDP analysis """ ref_name = ah.single_file_output(ref_file) if file_fig or onscreen: if file_name == "auto": file_name = ah.cdp_file_output(seq_name_1, seq_name_2, ref_name, nt, "pdf") pr.cdp_plot(counts_by_ref, seq_name_1, seq_name_2, nt, onscreen, file_fig, file_name, pub) if no_csv: out_csv_name = ah.cdp_file_output(seq_name_1, seq_name_2, ref_name, nt, "csv") wtf.cdp_output(counts_by_ref, seq_name_1, seq_name_2, out_csv_name)
def reads_aligned_per_seq(seq_file_list, ref_file, nt, split, min_read_len=18, max_read_len=32, min_read_no=1, processes=4): """ Get RPMR alignments for each sequence file in the list - no plot :param seq_file_list: [path/to/seq/, path/to/seq2,...] (list(str)) :param ref_file: path/to/reference (str): :param nt: read length to align (int) :param split: spit reads or not (bool) :param min_read_size: exclude reads with lengths < min_read_size (int) :param max_read_size: exclude reads with lengths > max_read_size (int) :param min_read_no: exclude reads with counts below min_read_no (int) :param pub: publication plot with no axes, legend (bool) :param processes: no of processes to generate at a time i.e. threads (int) """ """ Calculates normalised reads aligned to multiple reference sequences for each seq file individually Outputs a csv only (no scatter plot) """ print( colored("-----------------LOADING SEQUENCES----------------", 'green')) loaded_seq_list = [] # list of SRNASeq objects loaded_seq_name_list = [] # list of seq names in same order for seq_file in range(len(seq_file_list)): seq = SRNASeq() seq.load_seq_file(seq_file_list[seq_file], max_read_len, min_read_no, min_read_len) loaded_seq_list.append(seq) seq_name = ah.single_file_output(seq_file_list[seq_file]) loaded_seq_name_list.append(seq_name) if split: cdp.cdp_no_split_single(loaded_seq_list, loaded_seq_name_list, ref_file, nt, processes) else: cdp.cdp_split_single(loaded_seq_list, loaded_seq_name_list, ref_file, nt, processes)