Exemplo n.º 1
0
    def clusterWell(self):

        # Check if the dereplicated file is empty
        if self.dereplicated_file == '' or gzipIsEmpty(self.dereplicated_file):

            self.clustered_file = ''

        else:

            # Define the clustered path
            clustered_path = os.path.join(self.out_path, self.clustered_dir)

            # Create the directory, if needed
            if not os.path.exists(clustered_path):
                os.makedirs(clustered_path)

            # Define the clustered file
            self.clustered_file = os.path.join(clustered_path,
                                               '%s_clustered.fasta' % self.ID)

            # Cluster the file
            clusterFasta(self.on_plate, self.ID, self.dereplicated_file,
                         self.clustered_file)

            # Gzip the file, return the updated filename
            self.clustered_file = gzipCompress(self.clustered_file,
                                               return_filename=True)
Exemplo n.º 2
0
    def truncateWell(self):

        # Check if the merged file is empty
        if self.merged_file == '' or gzipIsEmpty(self.merged_file):

            self.truncated_file = ''

        else:

            # Define the truncated path
            truncated_path = os.path.join(self.out_path, self.truncated_dir)

            # Create the directory, if needed
            if not os.path.exists(truncated_path):
                os.makedirs(truncated_path)

            # Define the truncated file
            self.truncated_file = os.path.join(truncated_path,
                                               '%s_stripped.fastq' % self.ID)

            # Truncate the merged file
            truncateFastq(self.merged_file, self.truncated_file)

            # Gzip the file, return the updated filename
            self.truncated_file = gzipCompress(self.truncated_file,
                                               return_filename=True)
Exemplo n.º 3
0
    def filterWell(self):

        # Check if the truncated file is empty
        if self.truncated_file == '' or gzipIsEmpty(self.truncated_file):

            self.filtered_file = ''

        else:

            # Define the filtered path
            filtered_path = os.path.join(self.out_path, self.filtered_dir)

            # Create the directory, if needed
            if not os.path.exists(filtered_path):
                os.makedirs(filtered_path)

            # Define the filtered file
            self.filtered_file = os.path.join(filtered_path,
                                              '%s_filtered.fasta' % self.ID)

            # Filter the truncated file
            filterFastq(self.truncated_file, self.filtered_file)

            # Gzip the file, return the updated filename
            self.filtered_file = gzipCompress(self.filtered_file,
                                              return_filename=True)
Exemplo n.º 4
0
    def mergeWell(self):

        # Check if the R1 or R2 files are empty
        if gzipIsEmpty(self.well_R1_file) or gzipIsEmpty(self.well_R2_file):

            self.merged_file = ''
            self.unmerged_R1_file = ''
            self.unmerged_R2_file = ''

        else:

            # Add a trailing directory symbol to the output path
            merged_path = os.path.join(self.out_path, self.merged_dir, '')

            # Create the directory, if needed
            if not os.path.exists(merged_path):
                os.makedirs(merged_path)

            # Define the merged and unmerged files
            self.merged_file = '%s%s_merged.fastq' % (merged_path, self.ID)
            self.unmerged_R1_file = '%s%s_notmerged_R1.fastq' % (merged_path,
                                                                 self.ID)
            self.unmerged_R2_file = '%s%s_notmerged_R2.fastq' % (merged_path,
                                                                 self.ID)

            # Merge the well R1 and R2 files
            mergePairs(self.ID, self.well_R1_file, self.well_R2_file,
                       self.merged_file, self.unmerged_R1_file,
                       self.unmerged_R2_file)

            # Gzip the files, return the updated filenames
            self.merged_file = gzipCompress(self.merged_file,
                                            return_filename=True)
            self.unmerged_R1_file = gzipCompress(self.unmerged_R1_file,
                                                 return_filename=True)
            self.unmerged_R2_file = gzipCompress(self.unmerged_R2_file,
                                                 return_filename=True)
Exemplo n.º 5
0
    def mostAbundantWell(self):

        # Check if the clustered file is empty
        if self.clustered_file == '' or gzipIsEmpty(self.clustered_file):

            self.common_file = ''

        else:

            # Define the common path
            common_path = os.path.join(self.out_path, self.common_dir)

            # Create the directory, if needed
            if not os.path.exists(common_path):
                os.makedirs(common_path)

            # Define the common file
            self.common_file = os.path.join(common_path,
                                            '%s_common.fasta.gz' % self.ID)

            # Open file to store most common reads
            common_file = gzip.open(self.common_file, 'wt')

            # Define an int to store the abundance of the read
            most_abundant_count = 0

            # Define an int to store the rank of the most abundance read
            most_abundant_rank = 0

            # Decompress the gzip file
            with gzip.open(self.clustered_file, "rt") as clustered_handle:

                # Loop the clustered file, line by line
                for clustered_line in clustered_handle:

                    # Check if the line is a header
                    if clustered_line.startswith('>'):

                        # Get the abundance
                        read_abundance = int(
                            clustered_line.strip().split('=')[1])

                        # Check if the abundance is higher than the stored value
                        if read_abundance > most_abundant_count:

                            # Update the count, and record is more abundant
                            most_abundant_count = read_abundance

                            # Get the rank
                            read_rank = int(
                                clustered_line.strip().split(';')[0].rsplit(
                                    '_', 1)[1])

                            # Update the rank
                            most_abundant_rank = read_rank

            # Check if the read rank is not correctly ordered
            if most_abundant_rank != 1:

                # Sort the well to correct the order
                self.sortWell()

            # Decompress the gzip file
            with gzip.open(self.clustered_file, "rt") as clustered_handle:

                # Loop the clustered file, record by record
                for record in SeqIO.parse(clustered_handle, "fasta"):

                    # Assign the abundance
                    read_abundance = int(record.id.split('=')[1])

                    # Check if the abundance is higher than the stored value
                    if read_abundance >= (0.5 * most_abundant_count):

                        # Write the fasta sequence to the common file
                        common_file.write(record.format("fasta"))

            # Close the common file
            common_file.close()