示例#1
0
def handle_input(filename):
    sys.stdout.write("Handling %s\n" % filename)
    not_significant_ids = IdList()
    not_found_ids = IdList()

    prefix = FileRoutines.split_filename(filename)[1]
    index_file = "%s.tmp.idx" % prefix
    hmm_dict = SearchIO.index_db(index_file, filename, args.format)
    if args.output == "stdout":
        out_fd = sys.stdout
    else:
        out_fd = open("%s%s.top_hits" % (args.top_hits_dir, prefix), "w")
        out_fd.write("#query\thit\tevalue\tbitscore\n")

    for query in hmm_dict:
        if hmm_dict[query].hits:
            if hmm_dict[query][0].is_included:
                out_fd.write(
                    "%s\t%s\t%s\t%s\n" %
                    (query, hmm_dict[query][0].id, hmm_dict[query][0].evalue,
                     hmm_dict[query][0].bitscore))
            else:
                not_significant_ids.append(query)
        else:
            not_found_ids.append(query)

    if args.output != "stdout":
        out_fd.close()

    os.remove(index_file)
    return not_significant_ids, not_found_ids
示例#2
0
def handle_input(filename):
    sys.stdout.write("Handling %s\n" % filename)
    prefix = FileRoutines.split_filename(filename)[1]
    index_file = "%s.tmp.idx" % prefix
    hmm_dict = SearchIO.index_db(index_file, filename, args.format)
    if args.output == "stdout":
        out_fd = sys.stdout
    else:
        out_fd = open("%s%s.top_hits" % (args.top_hits_dir, prefix), "w")
        out_fd.write("#query\thit\tevalue\tbitscore\n")
    for family in hmm_dict:
        #print hmm_dict[key]
        for hit in hmm_dict[family]:
            if hit.is_included:
                out_fd.write("%s\t%s\t%s\t%s\n" %
                             (family, hit.id, hit.evalue, hit.bitscore))
    if args.output != "stdout":
        out_fd.close()

    os.remove(index_file)
示例#3
0
                    dest="separator",
                    default="@",
                    help="Separator to use. default - '@'")
parser.add_argument("-k",
                    "--key_column_index",
                    action="store",
                    dest="key_column_index",
                    type=int,
                    default=0,
                    help="Index of key column in synonym file. Default: 0")
parser.add_argument("-v",
                    "--value_column_index",
                    action="store",
                    dest="value_column_index",
                    type=int,
                    default=1,
                    help="Index of value column in synonym file.Default: 1")
args = parser.parse_args()

label = args.label if args.label else FileRoutines.split_filename(
    args.cluster_file)[1]

SequenceClusterRoutines.label_cluster_elements_from_file(
    args.cluster_file,
    label,
    args.output,
    separator=args.separator,
    label_position=args.label_position,
    key_index=args.key_column_index,
    value_index=args.value_column_index)
示例#4
0
                    type=FileRoutines.check_path,
                    help="Directory to write fam files named by species names")
parser.add_argument("-d", "--syn_file", action="store", dest="syn_file", required=True,
                    help="File with taxa ids and species names")
parser.add_argument("-k", "--key_index", action="store", dest="key_index", type=int, default=0,
                    help="Key column in file with synonyms(0-based). Default: 0")
parser.add_argument("-v", "--value_index", action="store", dest="value_index", type=int, default=1,
                    help="Value column in file with synonyms(0-based). Default: 1")
parser.add_argument("-c", "--comments_prefix", action="store", dest="comments_prefix", default="#",
                    help="Prefix of comments in synonyms file. Default - '#'")
parser.add_argument("-m", "--columns_separator", action="store", dest="separator", default="\t",
                    help="Column separator in file with synonyms")
parser.add_argument("-e", "--header", action="store_true", dest="header", default=False,
                    help="Header is present in synonyms file. Default - False")

args = parser.parse_args()

syn_dict = SynDict()
syn_dict.read(args.syn_file, header=args.header, separator=args.separator, key_index=args.key_index,
              value_index=args.value_index, comments_prefix=args.comments_prefix)

FileRoutines.safe_mkdir(args.output_files_dir)
input_files = os.listdir(args.input_files_dir)
for filename in input_files:
    directory, taxon_id, extension = FileRoutines.split_filename(filename)
    if taxon_id not in syn_dict:
        print("Species name was not found for taxon %s" % taxon_id)
        continue
    shutil.copy("%s%s" % (args.input_files_dir, filename),
                "%s%s%s" % (args.output_files_dir, syn_dict[taxon_id], extension))
示例#5
0
parser.add_argument("-m",
                    "--histogram_output",
                    action="store",
                    dest="histogram_output",
                    required=True,
                    help="File to write histogram")

args = parser.parse_args()

unique_position_dict = TwoLvlDict()

FileRoutines.safe_mkdir(args.output_dir)

for alignment_file in args.input:
    alignment_name_list = FileRoutines.split_filename(alignment_file)
    output_prefix = "%s/%s.unique_positions" % (args.output_dir,
                                                alignment_name_list[1])

    unique_position_dict[alignment_name_list[
        1]] = MultipleAlignmentRoutines.count_unique_positions_per_sequence_from_file(
            alignment_file,
            output_prefix,
            format=args.format,
            gap_symbol="-",
            return_mode="relative",
            verbose=False)

species_list = unique_position_dict.sl_keys()

data_dict = OrderedDict()
示例#6
0
parser.add_argument("-c",
                    "--hmmer_dir",
                    action="store",
                    dest="hmmer_dir",
                    default="",
                    help="Directory with hmmer v3.1 binaries")
parser.add_argument("-d",
                    "--blast_dir",
                    action="store",
                    dest="blast_dir",
                    default="",
                    help="Directory with BLAST+ binaries")

args = parser.parse_args()

input_filename_list = FileRoutines.split_filename(args.input)
input_filename = input_filename_list[1] + input_filename_list[2]

workdir_dir = "%s.transdecoder_dir/" % input_filename
pep_from_longest_orfs = "%s/longest_orfs.pep" % workdir_dir

hmmscan_dir = "hmmscan_vs_pfam/"
blastp_dir = "blastp_vs_uniref/"

FileRoutines.safe_mkdir(hmmscan_dir)
FileRoutines.safe_mkdir(blastp_dir)

hmmscan_splited_fasta_dir = "%ssplited_fasta_dir/" % hmmscan_dir
splited_domtblout_dir = "%ssplited_domtblout_dir/" % hmmscan_dir

hmmscan_vs_pfam_prefix = "%s.pfam" % input_filename
示例#7
0
                    type=lambda x: FileRoutines.make_list_of_path_to_files(x.split(",")),
                    help="Comma-separated list of files or directory with files "
                         "containing alignments(one alignment per file)")
parser.add_argument("-n", "--max_gap_number", action="store", dest="max_gap_number", default=0, type=int,
                    help="Maximum number of gaps to retain column")
parser.add_argument("-o", "--output_directory", action="store", dest="output", type=FileRoutines.check_path,
                    help="Output directory")
parser.add_argument("-g", "--gap_symbol", action="store", dest="gap_symbol", default="-",
                    help="Gap symbol")
parser.add_argument("-s", "--suffix", action="store", dest="suffix", default=".gaps_removed",
                    help="Suffix to use in output files. Default: '.gaps_removed'")
parser.add_argument("-f", "--format", action="store", dest="format", default="fasta",
                    help="Format of alignment")
parser.add_argument("-v", "--verbose", action="store_true", dest="verbose",
                    help="Print not found ids. Default - no")

args = parser.parse_args()

FileRoutines.safe_mkdir(args.output)

for alignment_file in args.input:
    splited_filename = FileRoutines.split_filename(alignment_file)
    if args.verbose:
        print ("Handling %s ..." % alignment_file)
    output_filename = "%s%s%s%s" % (args.output, splited_filename[1], args.suffix, splited_filename[2])
    alignment = AlignIO.read(alignment_file, args.format)
    filtered_alignment = MultipleAlignmentRoutines.remove_columns_with_gaps(alignment, args.max_gap_number,
                                                                            gap_symbol=args.gap_symbol)
    AlignIO.write(filtered_alignment, output_filename, args.format)