Exemplo n.º 1
0
def get_output_genome_directory_list(option_map, config_map, genome_directory_list):

    # output genome directory list
    output_genome_directory_list = []

    # get working directory
    working_directory = option_map[working_directory_str]

    # get reference_genome_directory
    reference_genome_directory = utils.set_directory_path(config_map[ConfigString.reference_genome_directory_str])
    parent_genome_directory_name = "sigma_alignments_output"

    # get parent genome directory
    parent_genome_directory = working_directory + parent_genome_directory_name

    # mkdir parent_genome_directory
    if not os.path.isdir(parent_genome_directory):
        check_call(["mkdir", parent_genome_directory], stdout = PIPE, stderr = sys.stderr)

    # for loop genome_index_base_list
    for genome_directory in genome_directory_list:

        # retrieve genome_directory_child_path
        genome_name = os.path.basename(genome_directory)
        child_genome_directory = utils.set_directory_path(parent_genome_directory) + genome_name
        output_genome_directory_list.append(child_genome_directory)

        # mkdir child_genome_directory
        if not os.path.isdir(child_genome_directory):
            check_call(["mkdir", child_genome_directory], stdout = PIPE, stderr = sys.stderr)

    return output_genome_directory_list
Exemplo n.º 2
0
def get_genome_index_base(genome_directory_list):

    # genome_index_base_list
    genome_index_base_list = []    # initialize dictionary

    # for loop genome_index_base_list
    for genome_directory in genome_directory_list:

        # retrieve genome_directory_child_path
        genome_name = os.path.basename(genome_directory)

        # set bowtie_index_count as 0
        bowtie_index_count = 0

        # get genome_index_base
        genome_index_base = utils.set_directory_path(genome_directory) + genome_name

        # loop genome_index_path
        for filename in os.listdir(genome_directory):
            if utils.check_bowtie_index_format(filename):
                bowtie_index_count += 1

        # if bowtie index exist, then save to list
        if bowtie_index_count > 0:
            genome_index_base_list.append(genome_index_base)

    # check genome index == 0
    if len(genome_index_base_list) == 0:
        sys.stderr.write("\n** Cannot open bowtie2 index files. Check the Reference_Genome_Directory in config file.\n")
        utils.die("** Program exit!")

    return genome_index_base_list
Exemplo n.º 3
0
def get_bamlog_path(config_path, genome_output_directory):

    # get sam log filename
    config_file = os.path.basename(config_path)
    (config_base, config_ext) = os.path.splitext(config_file)
    genome_name = os.path.basename(genome_output_directory)
    bamlog_path = utils.set_directory_path(genome_output_directory) + genome_name + ".align.log"

    return bamlog_path
Exemplo n.º 4
0
def get_filtered_bamout_path(config_path, genome_output_directory):

    # get sam output filename
    config_file = os.path.basename(config_path)
    (config_base, config_ext) = os.path.splitext(config_file)
    genome_name = os.path.basename(genome_output_directory)
    filtered_bamout_path = utils.set_directory_path(genome_output_directory) + genome_name + ".filtered.bam"

    return filtered_bamout_path
Exemplo n.º 5
0
def search_genome_fasta_path(config_map):

    # retrieve options
    reference_genome_directory = utils.set_directory_path(config_map[ConfigString.reference_genome_directory_str])

    # genome data structure
    genome_directory_list = []
    genome_fasta_path_list = []
    fasta_count = 0 

    # loop the genome directory
    dirs = [d for d in os.listdir(reference_genome_directory) if os.path.isdir(os.path.join(reference_genome_directory, d))]
    for genome_enum, genome_name in enumerate(dirs):

        # get genome directory
        genome_directory = reference_genome_directory + genome_name
        genome_directory_list.append(genome_directory)
        genome_fasta_path_list.append([])

        # loop fasta files
        for filename in os.listdir(genome_directory):

            # check fasta file
            if utils.check_fasta_format(filename):
                fasta_path = utils.set_directory_path(genome_directory) + filename
                genome_fasta_path_list[genome_enum].append(fasta_path)
                fasta_count += 1

    # check no orgaism list
    if len(genome_directory_list) == 0:
        sys.stderr.write("\n** Cannot search genome directory. Check the reference genome directory.\n")
        utils.die("** Program exit!")

    # check no fasta file
    if fasta_count == 0:
        sys.stderr.write("\n** Cannot find genome fasta file. Check the reference genome directory.\n")
        utils.die("** Program exit!")

    return (genome_directory_list, genome_fasta_path_list)
Exemplo n.º 6
0
def get_output_genome_directory_list_wo(option_map, config_map):

    # output genome directory list
    output_genome_directory_list = []

    # get working directory
    working_directory = option_map[working_directory_str]

    # get reference_genome_directory
    reference_genome_directory = utils.set_directory_path(config_map[ConfigString.reference_genome_directory_str])
    parent_genome_directory_name = os.path.basename(os.path.normpath(reference_genome_directory))

    # get parent genome directory
    parent_genome_directory = working_directory + parent_genome_directory_name

    # mkdir parent_genome_directory
    if not os.path.isdir(parent_genome_directory):
        sys.stderr.write("\n** Cannot open output directory. Check the Reference_Genome_Directory in config file exists in the working directory.\n")
        utils.die("** Program exit!")

    # loop the genome directory
    dirs = [d for d in os.listdir(parent_genome_directory) if os.path.isdir(os.path.join(parent_genome_directory, d))]
    for genome_enum, genome_name in enumerate(dirs):

        # get genome directory
        child_genome_directory = utils.set_directory_path(parent_genome_directory) + genome_name

        # loop fasta files
        for filename in os.listdir(child_genome_directory):

            # check bam file exists
            if utils.check_bam_format(filename):

                # append the genome directory into the list only if bam file exists
                output_genome_directory_list.append(child_genome_directory)

    return output_genome_directory_list
Exemplo n.º 7
0
def get_output_diff_sequence_path_sublist(output_genome_directory, genome_fasta_path_sublist):

    # initialize list
    output_diff_sequence_path_sublist = []

    # for loop 
    for genome_fasta_path in genome_fasta_path_sublist:
        genome_fasta_name = os.path.basename(genome_fasta_path)
        (genome_fasta_name_base, genome_fasta_name_ext) = os.path.splitext(genome_fasta_name)
        output_diff_sequence_name = genome_fasta_name_base + ".SNPs.txt"
        output_diff_sequence_path = utils.set_directory_path(output_genome_directory) + output_diff_sequence_name

        # append to list
        output_diff_sequence_path_sublist.append(output_diff_sequence_path)
    
    return output_diff_sequence_path_sublist