Exemplos de Utility.get_total_seq_from_fasta em Python

Linguagem de programação: Python

Classe / Tipo: Utility

Método / Função: get_total_seq_from_fasta

Exemplos em hotexamples.com: 4

Utility.get_total_seq_from_fasta em Python - 4 exemplos encontrados. Esses são os exemplos do mundo real mais bem avaliados de Utility.get_total_seq_from_fasta do pacote table-linker em Python extraídos de projetos de código aberto. Você pode avaliar os exemplos para nos ajudar a melhorar a qualidade deles.

Métodos Frequentes

Exibir Ocultar

checkToken(11)

hashToken(8)

assertSuccess(6)

getWords(6)

getMyRoot(6)

Cmd(6)

getid(5)

report_error(5)

isArray(4)

hms(4)

get_repo_dir(4)

CheckParameter(4)

get_total_seq_from_fasta(4)

md5(3)

scale_a(3)

load_categories(3)

compare_all_despite_starvation(3)

add_to_mark(3)

getMyApp(2)

getACT(2)

fileName(2)

iterate_minibatches(2)

covarianceCalculator(2)

getSuboptimality(2)

get_seq_dict(2)

getVideoFile(2)

getCenter(2)

getMinute(2)

makeRandomToken(2)

ConvertPostion(2)

toast(2)

CodeUnit(2)

search_key_of_value_part_match(2)

scale_height(2)

Consensus(2)

load_vocabulary(2)

addToHistogram(2)

rot_center(1)

randomTrainingIndice(1)

get_sitelist_unambig_aa(1)

get_str_with_index(1)

union(1)

switch_led_info(1)

get_total_nongap_nuc_by_pos(1)

sound(1)

sortDictionary(1)

selectCodingOption(1)

load_dict_with_images(1)

search_key_of_value(1)

length_distribution_on_suffix(1)

Métodos Frequentes

checkToken (11)

hashToken (8)

assertSuccess (6)

getWords (6)

getMyRoot (6)

Cmd (6)

getid (5)

report_error (5)

isArray (4)

hms (4)

Métodos Frequentes

get_repo_dir (4)

CheckParameter (4)

get_total_seq_from_fasta (4)

md5 (3)

scale_a (3)

load_categories (3)

compare_all_despite_starvation (3)

add_to_mark (3)

getMyApp (2)

getACT (2)

fileName (2)

iterate_minibatches (2)

covarianceCalculator (2)

getSuboptimality (2)

get_seq_dict (2)

getVideoFile (2)

getCenter (2)

getMinute (2)

makeRandomToken (2)

ConvertPostion (2)

Métodos Frequentes

fileName (2)

iterate_minibatches (2)

covarianceCalculator (2)

getSuboptimality (2)

get_seq_dict (2)

getVideoFile (2)

getCenter (2)

getMinute (2)

makeRandomToken (2)

ConvertPostion (2)

toast (2)

CodeUnit (2)

search_key_of_value_part_match (2)

scale_height (2)

Consensus (2)

load_vocabulary (2)

addToHistogram (2)

rot_center (1)

randomTrainingIndice (1)

get_sitelist_unambig_aa (1)

get_str_with_index (1)

union (1)

switch_led_info (1)

get_total_nongap_nuc_by_pos (1)

sound (1)

sortDictionary (1)

selectCodingOption (1)

load_dict_with_images (1)

search_key_of_value (1)

length_distribution_on_suffix (1)

Métodos Frequentes

toast (2)

CodeUnit (2)

search_key_of_value_part_match (2)

scale_height (2)

Consensus (2)

load_vocabulary (2)

addToHistogram (2)

rot_center (1)

randomTrainingIndice (1)

get_sitelist_unambig_aa (1)

get_str_with_index (1)

union (1)

switch_led_info (1)

get_total_nongap_nuc_by_pos (1)

sound (1)

sortDictionary (1)

selectCodingOption (1)

load_dict_with_images (1)

search_key_of_value (1)

length_distribution_on_suffix (1)

id_generator (1)

intersect (1)

report_warning (1)

process_image_dsift (1)

loadConfig (1)

isAscending (1)

safeGetDirectory (1)

isDescending (1)

list2dic (1)

launch_fatal_process_alert (1)

sample_utility (1)

AABB3 (1)

get_seq2len (1)

Completer (1)

all_chars (1)

DataSegment (1)

Data (1)

CubicTimeScaling (1)

ConvertPostion2 (1)

Contrast_and_Bright (1)

Exemplo n.º 1

0

Exibir arquivo

Arquivo: Test_sam_handler.py Projeto: cfe-lab/Umberjack

def test_create_msa_slice_from_sam_pair_selection(self): """ Tests that the sam_handler.create_msa_slice_from_sam() is iterating through the records and selecting the correct records for pairing. - Test Missing Mates - Test Unmapped Mates - Test Mates Mapped to wrong ref - Test Pair Mapped to Wrong Ref - Test Low Map Qual Threshold: - Test Secondary, Chimeric Alignments: CIGAR: Should be tested in sam_record - test H, S, X, =, M, P - """ ACTUAL_TEST_PAIR_SELECTION_FULL_MSA_FASTA = TEST_DIR + os.sep + os.path.basename(TEST_PAIR_SELECTION_SAM).replace(".sam", ".msa.fasta") # Test that the pairs are selected correctly. We don't care about slices, breadth thresholds or N's or masking stop codons here. # But we do care about mapping quality and target references. actual_written = sam.sam_handler.create_msa_slice_from_sam(sam_filename=TEST_PAIR_SELECTION_SAM, ref=TEST_PAIR_SELECTION_TARGET_REF, out_fasta_filename=ACTUAL_TEST_PAIR_SELECTION_FULL_MSA_FASTA, mapping_cutoff=MAPQ_CUTOFF, read_qual_cutoff=READ_QUAL_CUTOFF, max_prop_N=1.0, breadth_thresh=0, start_pos=0, end_pos=0, do_insert_wrt_ref=False, do_mask_stop_codon=False) self.assertTrue(os.path.exists(ACTUAL_TEST_PAIR_SELECTION_FULL_MSA_FASTA) and os.path.getsize(ACTUAL_TEST_PAIR_SELECTION_FULL_MSA_FASTA) > 0, ACTUAL_TEST_PAIR_SELECTION_FULL_MSA_FASTA + " doesn't exist or is empty") diff_line = TestSamHandler.diff_fasta_line(EXPECTED_TEST_PAIR_SELECTION_FULL_MSA_FASTA, ACTUAL_TEST_PAIR_SELECTION_FULL_MSA_FASTA) self.assertIsNone(diff_line, "Expected full msa fasta " + EXPECTED_TEST_PAIR_SELECTION_FULL_MSA_FASTA + " different than " + ACTUAL_TEST_PAIR_SELECTION_FULL_MSA_FASTA + ":\n" + str(diff_line)) expected_written = Utility.get_total_seq_from_fasta(EXPECTED_TEST_PAIR_SELECTION_FULL_MSA_FASTA) self.assertEqual(expected_written, actual_written, "Expect total written seq {} but got {} from {}".format(expected_written, actual_written, ACTUAL_TEST_PAIR_SELECTION_FULL_MSA_FASTA))

Exemplo n.º 2

0

Exibir arquivo

Arquivo: slice_miseq.py Projeto: cfe-lab/Umberjack

def tabulate_rates(fasttree_output_dir, output_csv_filename, comments): """ Collects all the GTR model rates from all the fasttree logs in a directory and puts them into output_csv_filename. ASSUME that multiple sequence aligned file is in the same folder :param output_dir: :return: """ import fnmatch # .../out/RunABC/HIV1B-nef/ABC_S89.HIV1B-nef.msa.1_300.fasttree.log with open(output_csv_filename,'w') as fh_out: fh_out.write("#" + comments + "\n") #writer = csv.DictWriter(fh_out, fieldnames=["ID","Ref","Window_Start","Window_End","Window_Reads","Non_Gap_Window_Start","Mutation,Rate"]) fh_out.write("ID,Ref,Window_Start,Window_End,Window_Reads,Non_Gap_Window_Start,Mutation,Rate\n") for root, dirs, filenames in os.walk(fasttree_output_dir): for fasttree_log in fnmatch.filter(filenames, '*.fasttree.log'): fullpath_fasttree_log = os.path.join(root, fasttree_log) AC, AG, AT, CG, CT, GT = fasttree.extract_gtr_rates(fullpath_fasttree_log) rates = {"AC":AC, "AG":AG, "AT":AT, "CG":CG, "CT":CT, "GT":GT} msa_slice_fasta_filename = fullpath_fasttree_log.replace(".fasttree.log", ".fasta") # sample_id.ref.msa.window_start_window_end.fasta name_split = os.path.basename(msa_slice_fasta_filename).split(".") window = name_split[-2] ref = name_split[-4] # TODO: what if reference has . in it? sample_id = ".".join(name_split[0:-4]) window_start, window_end = window.split("_") nongap_window_start = Utility.get_total_nongap_nuc_by_pos(msa_slice_fasta_filename, 0) reads = Utility.get_total_seq_from_fasta(msa_slice_fasta_filename) for mutation, rate in rates.iteritems(): fh_out.write(",".join([sample_id, ref, window_start, window_end, str(reads), str(nongap_window_start), mutation, str(rate)]) + "\n")

Exemplo n.º 3

0

Exibir arquivo

Arquivo: Test_sam_handler.py Projeto: cfe-lab/Umberjack

def test_create_msa_slice_from_sam_dup(self): """ Tests that the sam_handler.create_msa_slice_from_sam() is iterating through non-duplicate records and selecting the correct records for pairing. """ ACTUAL_TEST_PAIR_SELECTION_FULL_MSA_FASTA = TEST_DIR + os.sep + os.path.basename(TEST_PAIR_SELECTION_REMDUP_SAM).replace(".sam", ".msa.fasta") ACTUAL_TEST_PAIR_SELECTION_DUP_TSV = TEST_DIR + os.sep + os.path.basename(TEST_PAIR_SELECTION_REMDUP_SAM).replace(".sam", ".tsv") # Test that the pairs are selected correctly. We don't care about slices, breadth thresholds or N's or masking stop codons here. # But we do care about mapping quality and target references. actual_written = sam.sam_handler.create_msa_slice_from_sam(sam_filename=TEST_PAIR_SELECTION_REMDUP_SAM, ref=TEST_PAIR_SELECTION_TARGET_REF, out_fasta_filename=ACTUAL_TEST_PAIR_SELECTION_FULL_MSA_FASTA, mapping_cutoff=MAPQ_CUTOFF, read_qual_cutoff=READ_QUAL_CUTOFF, max_prop_N=1.0, breadth_thresh=0, start_pos=0, end_pos=0, do_insert_wrt_ref=False, do_mask_stop_codon=False, do_remove_dup=True, out_dup_tsv_filename=ACTUAL_TEST_PAIR_SELECTION_DUP_TSV) self.assertTrue(os.path.exists(ACTUAL_TEST_PAIR_SELECTION_DUP_TSV) and os.path.getsize(ACTUAL_TEST_PAIR_SELECTION_DUP_TSV)>0, ACTUAL_TEST_PAIR_SELECTION_DUP_TSV + " doesn't exist or is empty") self.assertTrue(os.path.exists(ACTUAL_TEST_PAIR_SELECTION_FULL_MSA_FASTA) and os.path.getsize(ACTUAL_TEST_PAIR_SELECTION_FULL_MSA_FASTA) > 0, ACTUAL_TEST_PAIR_SELECTION_FULL_MSA_FASTA + " doesn't exist or is empty") diff_line = TestSamHandler.diff_fasta_line(EXPECTED_TEST_PAIR_SELECTION_REMDUP_FULL_MSA_FASTA, ACTUAL_TEST_PAIR_SELECTION_FULL_MSA_FASTA) self.assertIsNone(diff_line, "Expected full msa fasta " + EXPECTED_TEST_PAIR_SELECTION_REMDUP_FULL_MSA_FASTA + " different than " + ACTUAL_TEST_PAIR_SELECTION_FULL_MSA_FASTA + ":\n" + str(diff_line)) expected_written = Utility.get_total_seq_from_fasta(EXPECTED_TEST_PAIR_SELECTION_REMDUP_FULL_MSA_FASTA) self.assertEqual(expected_written, actual_written, "Expect total written seq {} but got {} from {}".format(expected_written, actual_written, ACTUAL_TEST_PAIR_SELECTION_FULL_MSA_FASTA))

Exemplo n.º 4

0

Exibir arquivo

Arquivo: sam_handler.py Projeto: cfe-lab/Umberjack

def create_msa_slice_from_sam(sam_filename, ref, out_fasta_filename, mapping_cutoff, read_qual_cutoff, max_prop_N, breadth_thresh, start_pos=0, end_pos=0, do_insert_wrt_ref=False, do_mask_stop_codon=False, do_remove_dup=False, ref_len=0): """ Parse SAM file contents for sequences aligned to a reference. Extracts the portion of the read that fits into the desired slice of the genome. For paired-end reads, merges the mates into a single sequence with gaps with respect to the reference. Creates a multiple sequence alignment (MSA) for the desired slice. Left and right pads the reads according to the positions within the slice. Writes the MSA sequences to out_fasta_filename. Converts query names so that they are compatible with Newick format in phylogenetic reconstruction by converting colons, semicolons, parentheses to underscores. NB: Sam file must be query sorted. NB: Only takes the primary alignment. :param str sam_filename: full path to sam file. Must be queryname sorted. :param str ref: name of reference contig to form MSA alignments to. If None, then splits out all alignments to any reference that fit within the desired slice positions. Setting the ref to None is only useful when the reads are aligned to a set of multiple sequence aligned reference contigs, and you don't care which reference the read hits, just that it fits in the slice. :param str out_fasta_filename: full path to output multiple sequence aligned fasta file for the sequences in the slice. :param int mapping_cutoff: Ignore alignments with mapping quality lower than the cutoff. :param int read_qual_cutoff: Convert bases with quality lower than this cutoff to N unless both mates agree. :param float max_prop_N: Do not output reads with proportion of N higher than the cutoff. Only counts the bases within the slice. This only makes a difference if the slice is expected to be much wider than the (merged) read length. :param float breadth_thresh: Fraction of the slice that the read must cover with actual bases A, C, G, T. Reads below this threshold are excluded from output. :param int start_pos: 1-based start nucleotide start position of slice. If 0, then uses beginning of ref. :param int end_pos: 1-based end nucleotide start position of slice. If 0, then uses end of ref. :param bool do_insert_wrt_ref: whether to exclude insertions to the reference. If include insertions, then the insertions will be multiple sequence aligned further by MAFFT. :param bool do_mask_stop_codon: whether to mask stop codons with "NNN". Most useful when you want to do codon analysis aftwards, as many codon models do not allow stop codons. Assumes that the reference starts at the beginning of a codon. :param bool do_remove_dup: whether or not to exclude duplicate sequence. Only the the merged read with the highest sum of quality scores of aligned bases will be written to fasta if it is duplicated. To be considered a duplicate the sequence must have same start coordinate with respect to reference and matching bases, gaps, N's. :param int ref_len: length of reference. If 0, then takes length from sam headers. :returns int: total sequences written to multiple sequence aligned fasta :raises : :py:class:`exceptions.ValueError` if sam file is not queryname sorted according to the sam header """ LOGGER.debug("About to slice fasta " + out_fasta_filename + " from " + sam_filename) if os.path.exists(out_fasta_filename) and os.path.getsize(out_fasta_filename): LOGGER.warn("Found existing Sliced MSA-Fasta " + out_fasta_filename + ". Not regenerating.") total_seq = Utility.get_total_seq_from_fasta(out_fasta_filename) LOGGER.debug("Done slice fasta " + out_fasta_filename) return total_seq total_written = 0 with open(out_fasta_filename, 'w') as out_fasta_fh: if do_remove_dup: pair_iter = uniq_record_iter(sam_filename=sam_filename, ref=ref, mapping_cutoff=mapping_cutoff, read_qual_cutoff=read_qual_cutoff, is_insert=do_insert_wrt_ref) else: pair_iter = record_iter(sam_filename=sam_filename, ref=ref, mapping_cutoff=mapping_cutoff, ref_len=ref_len) for pair in pair_iter: mseq, mqual, stats = pair.get_seq_qual(do_pad_wrt_ref=False, do_pad_wrt_slice=True, q_cutoff=read_qual_cutoff, slice_start_wrt_ref_1based=start_pos, slice_end_wrt_ref_1based=end_pos, do_insert_wrt_ref=do_insert_wrt_ref, do_mask_stop_codon=do_mask_stop_codon) is_written = __write_seq(out_fasta_fh, pair.get_name(), mseq, max_prop_N, breadth_thresh) total_written += 1 if is_written else 0 LOGGER.debug("Done slice fasta " + out_fasta_filename) return total_written