Exemplos de BA_support.remove_files_with_try em Python

Linguagem de programação: Python

Espaço para nome / nome do pacote: rna_blast_analyze.BR_core

Classe / Tipo: BA_support

Método / Função: remove_files_with_try

Exemplos em hotexamples.com: 3

BA_support.remove_files_with_try em Python - 3 exemplos encontrados. Esses são os exemplos do mundo real mais bem avaliados de rna_blast_analyze.BR_core.BA_support.remove_files_with_try em Python extraídos de projetos de código aberto. Você pode avaliar os exemplos para nos ajudar a melhorar a qualidade deles.

Métodos Frequentes

Exibir Ocultar

remove_one_file_with_try(9)

Subsequences(4)

non_redundant_seqs(4)

remove_files_with_try(3)

filter_ambiguous_seqs_from_list(3)

sel_seq_simple(2)

rc_hits_2_rna(2)

select_sequences_from_similarity_rec(2)

iter2file_name(2)

get_hit_n(1)

annotate_ambiguos_base(1)

parse_seq_str(1)

generate_random_name(1)

rebuild_structures_output_from_pred(1)

ct2db(1)

blast_in(1)

run_muscle(1)

sanitize_fasta_names_in_seqrec_list(1)

blast_hsps2list(1)

select_analyzed_aligned_hit(1)

parse_one_rec_in_multiline_structure(1)

Métodos Frequentes

remove_one_file_with_try (9)

Subsequences (4)

non_redundant_seqs (4)

remove_files_with_try (3)

filter_ambiguous_seqs_from_list (3)

sel_seq_simple (2)

rc_hits_2_rna (2)

select_sequences_from_similarity_rec (2)

iter2file_name (2)

get_hit_n (1)

Métodos Frequentes

annotate_ambiguos_base (1)

parse_seq_str (1)

generate_random_name (1)

rebuild_structures_output_from_pred (1)

ct2db (1)

blast_in (1)

run_muscle (1)

sanitize_fasta_names_in_seqrec_list (1)

blast_hsps2list (1)

select_analyzed_aligned_hit (1)

parse_one_rec_in_multiline_structure (1)

Métodos Frequentes

parse_one_rec_in_multiline_structure (1)

Exemplo n.º 1

0

Exibir arquivo

def refold_stockholm(stockholm_alig, consensus_structure): """ compute refold.pl from Vienna RNA package :param stockholm_alig: :param consensus_structure: :return: """ ml.debug(fname()) # convert to clustal alignment fd, clust_tempfile = mkstemp(prefix='rba_', suffix='_23', dir=CONFIG.tmpdir) with os.fdopen(fd, 'w') as f: stockholm_alig.write_clustal(f) # write fake alifold output with given consensus structure fd, alif_fake_file = mkstemp(prefix='rba_', suffix='_24', dir=CONFIG.tmpdir) with os.fdopen(fd, 'w') as f: # the consensus sequence in alifold file is really not used for anything f.write('A'*len(consensus_structure) + '\n') f.write(consensus_structure + '\n') # compute refold # refold_path = locate_refold() refold_constrained_file = compute_refold(clust_tempfile, alif_fake_file) parsed_seqs = [] with open(refold_constrained_file, 'r') as f: # read the file for seq in BA_support.parse_seq_str(f): parsed_seqs.append(seq) # cleanup BA_support.remove_files_with_try([clust_tempfile, alif_fake_file, refold_constrained_file]) return parsed_seqs

Exemplo n.º 2

0

Exibir arquivo

Arquivo: infer_homology.py Projeto: ELIXIR-CZ/rboAnalyzer

def run_cmalign_with_scores(fasta_file, cm_file, threads=None): fd_sfile, cm_sfile_path = mkstemp(prefix='rba_', suffix='_29', dir=CONFIG.tmpdir) os.close(fd_sfile) if threads: cm_params = '--notrunc --cpu {} --sfile {}'.format( threads, cm_sfile_path) else: cm_params = '--notrunc --sfile {}'.format(cm_sfile_path) cm_msa_file = run_cmalign_on_fasta(fasta_file, cm_file, cmalign_params=cm_params) cm_msa = read_st(cm_msa_file) # combine the eval and cm_msa_conservation_score # the cmalign scores somehow, look into the scoring if those scores are accessible, maybe they are far better then # my made up msa_conservation # there is - by option --sfile cm_align_scores = read_cmalign_sfile(cm_sfile_path) # the bit score can be probably directly comparable with blast bit score # i can also leverage the fact, that the badly aligned sequences with cmalign have negative bitscore # so my score can be cm_align_scores.index = range(len(cm_align_scores.index)) BA_support.remove_files_with_try([cm_sfile_path, cm_msa_file]) return cm_msa, cm_align_scores

Exemplo n.º 3

0

Exibir arquivo

def _trusted_hits_selection_wrapper(all_hits_, query_, cmscore_tr_, cm_threshold_percent_, len_diff_=0.1): """ runs basic non_redundant sequences calculation (ie exact sequence match) selects homologous sequences from all hits list by cmscore threshold or by query sequence behaviour: will return distance array with similarities in % including query sequence and list of homologous sequences including query sequence if no sequence is homologous it will return empty array for distance matrix and list with query sequence """ ml.debug(fname()) msgs = [] # trusted sequence selection # ======================================================== assert (cmscore_tr_ == 0) or cm_threshold_percent_ is None score = _extract_cmscore_from_hom_seqs(all_hits_) if cm_threshold_percent_ is not None: selection_threshold = cm_threshold_percent_ * query_.annotations[ 'cmstat'].bit_sc / 100 else: selection_threshold = cmscore_tr_ pred = infer_hits_cm(score, tr=selection_threshold) trusted_seqs_ = [i for i, j in zip(all_hits_, pred) if j] if len(trusted_seqs_) == 0: msg = 'STATUS: No estimated full-length sequences from BLAST output ' \ 'selected as reference for structure prediction.\n' \ ' Using query sequence as reference.' msgs.append(msg) ml.info(msg) if ml.level > 20: print(msg) return np.empty(0), [query_], msgs # add query to trusted sequences trusted_seqs_query = [query_] + trusted_seqs_ # make nr list of sequences -> faster alignment # better selection nr_trusted_seqs_query = BA_support.non_redundant_seqs(trusted_seqs_query) # check if the homologous sequence is not exact match as query # (ie taking non redundant set would be only one sequence) if len(nr_trusted_seqs_query) == 1: msg = 'STATUS: All sequences selected as reference are exactly same as query sequence.' msgs.append(msg) ml.info(msg) if ml.level > 20: print(msg) return np.empty(0), [query_], msgs # select only sequences in some predifined length range to query # this is needed for longish ncRNAs # tolerate 10 % length difference? ref_len = len(query_) nr_len_selected_trusted = [ seq for seq in nr_trusted_seqs_query if ref_len * (1 - len_diff_) < len(seq) < ref_len * (1 + len_diff_) ] # this is to control if only one sequence remained after filtering for length difference if len(nr_len_selected_trusted) == 1: msg = \ 'No sequence satisfy the length difference condition ({}: {}-{})'.format( len_diff_, ref_len * (1 - len_diff_), ref_len * (1 + len_diff_) ) msgs.append(msg) ml.info(msg) if ml.level > 20: print(msg) return np.empty(0), [query_], msgs # sanitize seq names (muscle has issues with too long names) san_hom_seqs, san_dict = BA_support.sanitize_fasta_names_in_seqrec_list( nr_len_selected_trusted) c_fd, trusted_sequence_file_ = mkstemp(prefix='rba_', suffix='_60', dir=CONFIG.tmpdir) with os.fdopen(c_fd, 'w') as f: SeqIO.write(san_hom_seqs, f, 'fasta') align_file = BA_support.run_muscle(trusted_sequence_file_, reorder=True) alig = AlignIO.read(align_file, format='clustal') distance_calc = DistanceCalculator(model='identity') dist_mat = distance_calc.get_distance(alig) # rebuild index from sanitized orig_index = [san_dict[i] for i in dist_mat.names] dist_mat_pd = pandas.DataFrame.from_records(dist_mat.matrix, index=orig_index) dist_table_ = (1 - dist_mat_pd.values) * 100 BA_support.remove_files_with_try([align_file, trusted_sequence_file_]) return dist_table_, trusted_seqs_query, msgs