def handle_input(filename): sys.stdout.write("Handling %s\n" % filename) not_significant_ids = IdList() not_found_ids = IdList() prefix = FileRoutines.split_filename(filename)[1] index_file = "%s.tmp.idx" % prefix hmm_dict = SearchIO.index_db(index_file, filename, args.format) if args.output == "stdout": out_fd = sys.stdout else: out_fd = open("%s%s.top_hits" % (args.top_hits_dir, prefix), "w") out_fd.write("#query\thit\tevalue\tbitscore\n") for query in hmm_dict: if hmm_dict[query].hits: if hmm_dict[query][0].is_included: out_fd.write( "%s\t%s\t%s\t%s\n" % (query, hmm_dict[query][0].id, hmm_dict[query][0].evalue, hmm_dict[query][0].bitscore)) else: not_significant_ids.append(query) else: not_found_ids.append(query) if args.output != "stdout": out_fd.close() os.remove(index_file) return not_significant_ids, not_found_ids
def handle_input(filename): sys.stdout.write("Handling %s\n" % filename) prefix = FileRoutines.split_filename(filename)[1] index_file = "%s.tmp.idx" % prefix hmm_dict = SearchIO.index_db(index_file, filename, args.format) if args.output == "stdout": out_fd = sys.stdout else: out_fd = open("%s%s.top_hits" % (args.top_hits_dir, prefix), "w") out_fd.write("#query\thit\tevalue\tbitscore\n") for family in hmm_dict: #print hmm_dict[key] for hit in hmm_dict[family]: if hit.is_included: out_fd.write("%s\t%s\t%s\t%s\n" % (family, hit.id, hit.evalue, hit.bitscore)) if args.output != "stdout": out_fd.close() os.remove(index_file)
dest="separator", default="@", help="Separator to use. default - '@'") parser.add_argument("-k", "--key_column_index", action="store", dest="key_column_index", type=int, default=0, help="Index of key column in synonym file. Default: 0") parser.add_argument("-v", "--value_column_index", action="store", dest="value_column_index", type=int, default=1, help="Index of value column in synonym file.Default: 1") args = parser.parse_args() label = args.label if args.label else FileRoutines.split_filename( args.cluster_file)[1] SequenceClusterRoutines.label_cluster_elements_from_file( args.cluster_file, label, args.output, separator=args.separator, label_position=args.label_position, key_index=args.key_column_index, value_index=args.value_column_index)
type=FileRoutines.check_path, help="Directory to write fam files named by species names") parser.add_argument("-d", "--syn_file", action="store", dest="syn_file", required=True, help="File with taxa ids and species names") parser.add_argument("-k", "--key_index", action="store", dest="key_index", type=int, default=0, help="Key column in file with synonyms(0-based). Default: 0") parser.add_argument("-v", "--value_index", action="store", dest="value_index", type=int, default=1, help="Value column in file with synonyms(0-based). Default: 1") parser.add_argument("-c", "--comments_prefix", action="store", dest="comments_prefix", default="#", help="Prefix of comments in synonyms file. Default - '#'") parser.add_argument("-m", "--columns_separator", action="store", dest="separator", default="\t", help="Column separator in file with synonyms") parser.add_argument("-e", "--header", action="store_true", dest="header", default=False, help="Header is present in synonyms file. Default - False") args = parser.parse_args() syn_dict = SynDict() syn_dict.read(args.syn_file, header=args.header, separator=args.separator, key_index=args.key_index, value_index=args.value_index, comments_prefix=args.comments_prefix) FileRoutines.safe_mkdir(args.output_files_dir) input_files = os.listdir(args.input_files_dir) for filename in input_files: directory, taxon_id, extension = FileRoutines.split_filename(filename) if taxon_id not in syn_dict: print("Species name was not found for taxon %s" % taxon_id) continue shutil.copy("%s%s" % (args.input_files_dir, filename), "%s%s%s" % (args.output_files_dir, syn_dict[taxon_id], extension))
parser.add_argument("-m", "--histogram_output", action="store", dest="histogram_output", required=True, help="File to write histogram") args = parser.parse_args() unique_position_dict = TwoLvlDict() FileRoutines.safe_mkdir(args.output_dir) for alignment_file in args.input: alignment_name_list = FileRoutines.split_filename(alignment_file) output_prefix = "%s/%s.unique_positions" % (args.output_dir, alignment_name_list[1]) unique_position_dict[alignment_name_list[ 1]] = MultipleAlignmentRoutines.count_unique_positions_per_sequence_from_file( alignment_file, output_prefix, format=args.format, gap_symbol="-", return_mode="relative", verbose=False) species_list = unique_position_dict.sl_keys() data_dict = OrderedDict()
parser.add_argument("-c", "--hmmer_dir", action="store", dest="hmmer_dir", default="", help="Directory with hmmer v3.1 binaries") parser.add_argument("-d", "--blast_dir", action="store", dest="blast_dir", default="", help="Directory with BLAST+ binaries") args = parser.parse_args() input_filename_list = FileRoutines.split_filename(args.input) input_filename = input_filename_list[1] + input_filename_list[2] workdir_dir = "%s.transdecoder_dir/" % input_filename pep_from_longest_orfs = "%s/longest_orfs.pep" % workdir_dir hmmscan_dir = "hmmscan_vs_pfam/" blastp_dir = "blastp_vs_uniref/" FileRoutines.safe_mkdir(hmmscan_dir) FileRoutines.safe_mkdir(blastp_dir) hmmscan_splited_fasta_dir = "%ssplited_fasta_dir/" % hmmscan_dir splited_domtblout_dir = "%ssplited_domtblout_dir/" % hmmscan_dir hmmscan_vs_pfam_prefix = "%s.pfam" % input_filename
type=lambda x: FileRoutines.make_list_of_path_to_files(x.split(",")), help="Comma-separated list of files or directory with files " "containing alignments(one alignment per file)") parser.add_argument("-n", "--max_gap_number", action="store", dest="max_gap_number", default=0, type=int, help="Maximum number of gaps to retain column") parser.add_argument("-o", "--output_directory", action="store", dest="output", type=FileRoutines.check_path, help="Output directory") parser.add_argument("-g", "--gap_symbol", action="store", dest="gap_symbol", default="-", help="Gap symbol") parser.add_argument("-s", "--suffix", action="store", dest="suffix", default=".gaps_removed", help="Suffix to use in output files. Default: '.gaps_removed'") parser.add_argument("-f", "--format", action="store", dest="format", default="fasta", help="Format of alignment") parser.add_argument("-v", "--verbose", action="store_true", dest="verbose", help="Print not found ids. Default - no") args = parser.parse_args() FileRoutines.safe_mkdir(args.output) for alignment_file in args.input: splited_filename = FileRoutines.split_filename(alignment_file) if args.verbose: print ("Handling %s ..." % alignment_file) output_filename = "%s%s%s%s" % (args.output, splited_filename[1], args.suffix, splited_filename[2]) alignment = AlignIO.read(alignment_file, args.format) filtered_alignment = MultipleAlignmentRoutines.remove_columns_with_gaps(alignment, args.max_gap_number, gap_symbol=args.gap_symbol) AlignIO.write(filtered_alignment, output_filename, args.format)