Exemplo n.º 1
    def __call__(self):
		Launch the complete pipeline of analyse:

		* Reference importation/parsing
		* Facultative step of reference masking to remove homologies between reference sequences
		* Facultative step of Fastq quality Filtering/ adapter trimming
		* Facultative step of reference indexing for bwa from merged references
		* Short read alignment with bwa mem
		* Spliting of sam to attribute reads to each original references (or unmmapped)
		* Output per reference bam, sam, bedgraph, bed, covgraph, variant call
		* Output distribution table and graph
        stime = time()
        self.outdir = mkdir(path.abspath(self.outdir))

        print("\n##### PARSE REFERENCES #####\n")
        # Create CV_Reference.Reference object for each reference easily accessible through
        # Reference class methods

        if self.ref_masking or not self.bwa_index:
            self.ref_dir = mkdir(path.join(self.outdir, "references/"))
            self.index_dir = mkdir(path.join(self.outdir, "bwa_index/"))
            self.ref_dir = ""
            self.index_dir = ""

        # Reference Masking
        if self.ref_masking:
            print("\n##### REFERENCE HOMOLOGIES MASKING #####\n")
            self.db_dir = mkdir(path.join(self.outdir, "blast_db/"))
            ref_list = self._iterative_masker()
            # Erase existing index value if ref masking was performed
            bwa_index = None

        # Fastq Filtering
        if self.quality_filtering or self.adapter_trimming:
            print("\n##### FASTQ FILTERING #####\n")
            self.fastq_dir = mkdir(path.join(self.outdir, "fastq/"))
            self.R1, self.R2 = self._fastq_filter()

        # BWA alignment
        print("\n##### READ REFERENCES AND ALIGN WITH BWA #####\n")
        # An index will be generated if no index was provided
        self.result_dir = mkdir(path.join(self.outdir, "results/"))

        self.sam = Mem.align(self.R1,
                             align_outname=self.outprefix + ".sam",
                             index_outname=self.outprefix + ".idx")

        print("\n##### FILTER ALIGNED READS AND ASSIGN A REFERENCE #####\n")
        # Split the output sam file according to each reference

        print("\n##### GENERATE OUTPUT FOR EACH REFERENCE #####\n")
        # Deal with garbage read dictionnary
        # Ask references to generate the output they were configured to
        Reference.mk_output_global(self.result_dir + self.outprefix)
        # Create a distribution table

        print("\n##### DONE #####\n")
        print("Total execution time = {}s".format(round(time() - stime, 2)))
Exemplo n.º 2
def align(R1,
          aligner="bwa mem",
          indexer="bwa index",
    Main function of the package allowing to validate an existing index or to create a new one,
    then perform a alignment of single or paired fastq sequences against the index. Finally a sam
    file is returned for further analysis. If an valid existing index was given all index option
    and ref_fasta are not required.
    @param R1 Path to the file containing fastq sequences (can be gzipped)
    @param R2 Facultative path to the file containing paired fastq sequence (can be gzipped)
    @param index Index files basename if available
    @param ref Path of the fasta file containing the reference sequence (can be gzipped)
    This parameter can also be a list of fasta file (gzipped or not) in this case all references
    will be merged into a single fasta reference
    @param aligner Path ot the bwa mem executable. Not required if bwa if added to your path
    @param align_opt Bwa mem command line options as a string
    @param align_outdir Directory where to store the sam file
    @param align_outname Name of the output sam file
    @param indexer Path ot the bwa index executable. Not required if bwa if added to your path
    @param index_opt Bwa index command line options as a string
    @param index_outdir Directory where to store the index files
    @param index_outname Basename of the index file
    @return Path of the output sam file
    # Try to import an existing index
        if not index:
            raise Exception("No index provided")

        print("Existing index provided")
        idx = ExistingIndex(index)

    # If no index or if an error occured during validation of the existing index = create a new one
    except Exception as E:

        # Verify the presence of the reference fasta file
        if not ref:
            raise Exception(
                "Invalid or no fasta file provided. Cannot create an index")

        print("Generating index...")
        index_path = path.join(index_outdir, index_outname)
        idx = NewIndex(ref, index_path, index_opt, indexer)

    # Create a Aligner object
    mem = Aligner(idx, align_opt, aligner, align_threads)
    #~print (repr(mem))

    # Align the reference index with R1 fastq (and R2)
    align_path = path.join(align_outdir, align_outname)
    return (mem.align(R1, R2, align_path))
Exemplo n.º 3
Exemplo n.º 4
def align  (query_list,
            subject_db = None,
            subject_fasta = None,
            aligner = "blastn",
            align_opt = "",
            num_threads = 1,
            db_maker = "makeblastdb",
            db_opt = "",
            db_outdir = "./blast_db/",
            db_outname = "out"):

    Main function of RefMasker that integrate database creation, blast and homology masking
    * Instantiate Blast database and blastn object
    * Perform iterative blasts of query sequences against the subject database and create a list of
    @param query_list List of paths indicating fasta files containing query sequences (can be
    gzipped). Fasta can contains multiple sequences.
    @param subject_db Basename of file from a blast database created by "makeblastdb" if available
    @param subject_fasta Reference fasta file. Required if no ref_index is given (can be gzipped)
    @param aligner Path ot the blastn executable. Not required if blast+ if added to your path
    @param blastn_opt Blastn command line options as a string
    @param db_maker Path ot the makeblastdb executable. Not required if blast+ if added to your path
    @param db_opt makeblastdb command line options as a string
    @param db_outdir Directory where to store the database files
    @param db_outname Basename of the database files
    @return A list of BlastHit objects
    # Try to import an existing database
        if not subject_db:
            raise Exception("No Blast database was provided")

        print("Existing database provided")
        db = ExistingDB(subject_db)

    # If no DB or if an error occured during validation of the existing DB = create a new db
    except Exception as E:
        print (E)

        # Verify the presence of the reference fasta file
        if not subject_fasta or not path.isfile (subject_fasta):
            raise Exception("Invalid or no fasta file provided. Cannot create a database")

        print ("Generate a database...")
        db_path = path.join (db_outdir, db_outname)

        # Create the new database
        db = NewDB(ref_path=subject_fasta, db_path=db_path, makeblastdb_opt=db_opt, makeblastdb=db_maker)

    # Initialise a Blastn object
    blast = Aligner(db, align_opt, aligner, num_threads)
    #~print (repr(blast))

    # Generate a list of hit containing hits of all sequence in query list in subject
    hit_list = []
    # Extend the list of hits for each query in a bigger list.
    for query in query_list:

    return hit_list
Exemplo n.º 5
Exemplo n.º 6
Exemplo n.º 7
Exemplo n.º 8
Arquivo: Mem.py Projeto: a-slide/pyDNA
