Пример #1
0
def merge_vcf_wrap(args):
    """
        Merge VCFS 
    """
    vcfs = args.vcfs
    in_file = input_file.InputFile(args.input_file, args.output_directory)
    #merge_cnvs.merge_cnvs_clusters(vcfs, args.temp_dir,args.output_directory)
    merge_cnvs.merge_cnvs_indiv(vcfs, args.temp_dir, args.output_directory,
                                in_file)
Пример #2
0
def pilon_align_wrap(args):

    in_file = input_file.InputFile(args.input_file, args.input_directory)
    try:
        os.mkdir(args.temp_dir)
    except OSError:
        pass
    try:
        os.mkdir(args.output_directory)
    except OSError:
        pass
    # Align reads only if they don't already exist.
    # Then,run pilon to fix the reads removing contigs that are bad.
    # If PE and SE files are present run pilon to fix the contigs otherwise just filter.
    for sample in in_file:
        temp_fasta = scaffold.scaffold_filter(sample.scaffolds, args.temp_dir,
                                              sample.min_contig_length)
        align.align_reads(sample, args.temp_dir, temp_fasta, skip=True)
        pilon.run_pilon_contigs(sample, args.temp_dir, temp_fasta)
        # transfer the Pilon aligned bam files somewhere
        bwa_output = align.simple_bwa(sample, args.temp_dir,
                                      args.reference_file)
        shutil.copy(
            bwa_output,
            os.path.join(args.output_directory, os.path.basename(bwa_output)))
        shutil.copy(
            bwa_output + ".bai",
            os.path.join(args.output_directory,
                         os.path.basename(bwa_output) + ".bai"))
        shutil.copy(
            sample.pilon_fasta,
            os.path.join(args.output_directory,
                         os.path.basename(sample.pilon_fasta)))
        for pairs in sample.paired_end_list:
            shutil.copy(
                pairs[0],
                os.path.join(args.output_directory,
                             os.path.basename(pairs[0])))
            shutil.copy(
                pairs[1],
                os.path.join(args.output_directory,
                             os.path.basename(pairs[1])))
        samtools_faidx = " samtools faidx {0}".format(
            os.path.join(args.output_directory,
                         os.path.basename(sample.pilon_fasta)))
        subprocess.check_call(samtools_faidx, shell=True)
        # Move the pilon bams so they are stored for access
        basename = sample.bam_file.split(".bam")[0]
        shutil.copy(sample.bam_file, basename + ".pilon.bam")
        shutil.copy(sample.bam_file + '.bai', basename + ".pilon.bam.bai")
def genotype_inversions(input_file, input_directory, output_directory, inversions):
    """
        Genotyping inversions using short-read data.. 
    """
    in_file = input_file.InputFile(args.input_file, args.input_directory)
    for sample in in_file:
        inversion_file = os.path.join(output_directory, sample.samples_name + ".inv")
        print(inversion_file)
        align.align_reads(sample,args.temp_dir, reference_file, align_to_ref=True,skip=True)
        print(sample.bam_file)
        # Read in file. 

        # Call each CNV using short-read data. 

    sys.exit(1)
Пример #4
0
def cnv_call_wrap(args):
    """
        CNV call
    """
    in_file = input_file.InputFile(args.input_file, args.input_directory)
    try:
        os.mkdir(args.temp_dir)
    except OSError:
        pass
    try:
        os.mkdir(args.output_directory)
    except OSError:
        pass
    for sample in in_file:
        bwa_input = os.path.join(
            args.input_directory,
            sample.samples_name + ".align_to_ref.sorted.bam")
        fasta_reference = os.path.join(args.input_directory,
                                       sample.samples_name + ".fasta")
        cnv.call_cnvs(sample, args.mapping_quality, bwa_input,
                      args.output_directory, fasta_reference)
Пример #5
0
def genotype_cnvs_wrap(args):
    """
        Genotype CNVs
    """
    in_file = input_file.InputFile(args.input_file, args.input_directory)
    try:
        os.mkdir(args.temp_dir)
    except OSError:
        pass
    try:
        os.mkdir(args.output_directory)
    except OSError:
        pass
    reference_file = args.reference_file
    for sample in in_file:
        # Have to align
        break_point_folder = os.path.join(args.output_directory,
                                          sample.samples_name, "breakpoints")
        align.align_reads(sample, args.temp_dir, reference_file, skip=True)
        input_cnvs = (os.path.join(args.input_directory,
                                   sample.samples_name + ".cnv"))
        # Index and create fastas from fastq.
        fasta.index_fasta(sample, args.temp_dir)
        cnvs = cnv_calls.CNVs(input_cnvs, sample.bam_file, sample.fasta_one,
                              sample.fasta_two, break_point_folder)
        for i in range(len(cnvs)):
            # Skip chromosome M for now.
            if cnvs.input_rows[i]._chrom1 != "chrM" and cnvs.input_rows[
                    i]._variant_type != "misaligned_sv":
                cnvs.extract_windowed_bam_reads(i)
        # Write VCF file
        vcf_output = open(
            os.path.join(args.output_directory, sample.samples_name + ".vcf"),
            "w")
        vcf.write_vcf_header(sample.samples_name, vcf_output)
        for i in range(len(cnvs)):
            if cnvs.input_rows[i]._chrom1 != "chrM" and cnvs.input_rows[
                    i]._variant_type != "misaligned_sv":
                vcf.write_vcf_row(cnvs.input_rows[i], vcf_output)