Exemplo n.º 1
0
 def _sam2bam2sorted_bam(self):
     subprocess.getoutput("rm -f {}*".format(self._pk.samtools_sorted_file_name))
     Utilities.batch_remove(self._pk.samtools_converted_log_file_name)
     # SamTools details: http://www.htslib.org/doc/samtools.html
     # Avoiding self._pk.samtools_converted_file_name
     s = subprocess.getoutput("samtools view -bu -@ 1 {a} | \
                               samtools sort - -o -@ 1 {b}".format(a=self._pk.mapped_reads_file_name,
                                                                   b=self._pk.samtools_sorted_file_name))
     Utilities.dump_string(string=s, file=self._pk.samtools_converted_log_file_name)
     logging.info("Sorted SAM file: '{}'".format(self._pk.samtools_sorted_file_name))
     del s
Exemplo n.º 2
0
    def compile(input_file: str,
                output_dir: str,
                preserve_headers: bool = False,
                chop: bool = False,
                chunk_length: int = int(3.6 * 10**9)):
        import json
        from modules.FASTAArray import FASTAArray
        from modules.RefDataLine import RefDataLine

        output_dir = Utilities.ends_with_slash(output_dir)
        os.makedirs(output_dir, exist_ok=True)
        refdatas_dict = FASTAArray.prepare_nfasta_for_indexing(
            input_file=input_file,
            output_dir=output_dir,
            preserve_headers=preserve_headers,
            chop=chop,
            chunk_length=chunk_length)
        output_dict = {}
        for sequence_id in refdatas_dict:
            annotation_dict = refdatas_dict[sequence_id]
            nfasta_file = annotation_dict.get("reference_nfasta")
            if not nfasta_file:
                continue
            indexing_dict = {"alias": Utilities.filename_only(nfasta_file)}
            indexing_dict.update(RefDataLine.fill_dict(nfasta_file))
            indexing_dict.update(annotation_dict)
            print("Processing nFASTA: '{}'".format(nfasta_file))
            refdata = RefDataLine(indexing_dict)
            refdata.index()
            output_dict[sequence_id] = indexing_dict
        output_file = "{a}{b}_refdata.json".format(
            a=Utilities.ends_with_slash(output_dir),
            b=Utilities.filename_only(input_file))
        Utilities.dump_string(
            string=json.dumps(output_dict, sort_keys=False, indent=4) + "\n",
            file=output_file)
        print("Created reference data linker: '{}'".format(output_file))
        return output_file
Exemplo n.º 3
0
 def __samtools_faidx(self):
     s = subprocess.getoutput("samtools faidx {}".format(self._nfasta))
     Utilities.dump_string(string=s, file="{}_samtools_faidx.log".format(self._reference_mask))
     os.rename("{}.fai".format(self._nfasta), self.samtools_index_file)
     print("Created SAMTools FAI file: '{}'".format(self.samtools_index_file))
     self.___fai2genome()
Exemplo n.º 4
0
 def __bowtie2_build(self):
     s = subprocess.getoutput("bowtie2-build {a} {b}".format(a=self._nfasta, b=self.bowtie2_index_mask))
     Utilities.dump_string(string=s, file="{}_bowtie2-build.log".format(self._reference_mask))
     print("Created bowtie2 index with mask: '{}'".format(self.bowtie2_index_mask))
Exemplo n.º 5
0
 def _index_bam(self):
     Utilities.batch_remove(self._pk.samtools_index_file_name, self._pk.samtools_index_log_file_name)
     s = subprocess.getoutput("samtools index {}".format(self._pk.samtools_sorted_file_name))
     Utilities.dump_string(string=s, file=self._pk.samtools_index_log_file_name)
     logging.info("Indexed BAM file: '{}'".format(self._pk.samtools_index_file_name))
     del s