Exemplo n.º 1
0
    def test_vt_split_and_leftaln(self):
        """
        test that -n parameter is passed when allow_ref_mismatches is True
        and vice versa when it's False
        """
        reference_sequence = "ref.fasta"
        cmd = vt_split_and_leftaln(reference_sequence, allow_ref_mismatches=True)
        self.assertIn(' -n ', cmd)

        cmd = vt_split_and_leftaln(reference_sequence, allow_ref_mismatches=False)
        self.assertNotIn(' -n ', cmd)
Exemplo n.º 2
0
    def command(self):
        required("", self.input_tumor)
        required("", self.input_normal)

        freq_filter = (
            " bcftools filter -e 'STATUS !~ \".*Somatic\"' 2> /dev/null "
            "| %s -c 'from autoseq.util.bcbio import depth_freq_filter_input_stream; import sys; print depth_freq_filter_input_stream(sys.stdin, %s, \"%s\")' "
            % (sys.executable, 0, 'bwa'))

        somatic_filter = (
            " sed 's/\\.*Somatic\\\"/Somatic/' "  # changes \".*Somatic\" to Somatic
            "| sed 's/REJECT,Description=\".*\">/REJECT,Description=\"Not Somatic via VarDict\">/' "
            "| %s -c 'from autoseq.util.bcbio import call_somatic; import sys; print call_somatic(sys.stdin.read())' "
            % sys.executable)

        blacklist_filter = " | intersectBed -a . -b {} | ".format(
            self.blacklist_bed)

        cmd = "vardict-java " + required("-G ", self.reference_sequence) + \
              optional("-f ", self.min_alt_frac) + \
              required("-N ", self.tumorid) + \
              optional("-r ", self.min_num_reads) + \
              " -b \"{}|{}\" ".format(self.input_tumor, self.input_normal) + \
              " -c 1 -S 2 -E 3 -g 4 -Q 10 " + required("", self.target_bed) + \
              " | testsomatic.R " + \
              " | var2vcf_paired.pl -P 0.9 -m 4.25 -M " + required("-f ", self.min_alt_frac) + \
              " -N \"{}|{}\" ".format(self.tumorid, self.normalid) + \
              " | " + freq_filter + " | " + somatic_filter + " | " + fix_ambiguous_cl() + " | " + remove_dup_cl() + \
              " | vcfstreamsort -w 1000 " + \
              " | " + vt_split_and_leftaln(self.reference_sequence) + \
              " | bcftools view --apply-filters .,PASS " + \
              " | vcfsorter.pl {} /dev/stdin ".format(self.reference_dict) + \
              conditional(blacklist_filter, self.blacklist_bed) + \
              " | bgzip > {output} && tabix -p vcf {output}".format(output=self.output)
        return cmd
Exemplo n.º 3
0
 def command(self):
     required("", self.input_reference_sequence_fai)
     return "curl -L " + \
            required(" ", self.remote) + \
            "| gzip -d |" + vt_split_and_leftaln(self.input_reference_sequence, allow_ref_mismatches=True) + \
            "| bgzip " + required(" > ", self.output) + \
            " && tabix -p vcf {output}".format(output=self.output)
Exemplo n.º 4
0
    def command(self):
        regions_file = "{scratch}/{uuid}.regions".format(scratch=self.scratch,
                                                         uuid=uuid.uuid4())
        bed_to_regions_cmd = "cat {} | bed_to_regions.py > {}".format(
            self.target_bed, regions_file)

        call_somatic_cmd = " | {} -c 'from autoseq.util.bcbio import call_somatic; import sys; print call_somatic(sys.stdin.read())' ".format(
            sys.executable)

        freebayes_cmd = "freebayes-parallel {} {} ".format(regions_file, self.threads) + \
                        required("-f ", self.reference_sequence) + " --use-mapping-quality " + \
                        optional("--min-alternate-fraction ", self.min_alt_frac) + \
                        optional("--min-coverage ", self.min_coverage) + \
                        conditional(self.use_harmonic_indel_quals, "--harmonic-indel-quality") + \
                        optional("", self.params) + \
                        repeat(" ", self.input_bams) + \
                        """| bcftools filter -i 'ALT="<*>" || QUAL > 5' """ + \
                        "| filter_erroneus_alt.py -V /dev/stdin " + \
                        conditional(self.somatic_only, call_somatic_cmd) + \
                        " | " + vt_split_and_leftaln(self.reference_sequence) + \
                        " | vcfuniq | bcftools view --apply-filters .,PASS " + \
                        " | bgzip > {output} && tabix -p vcf {output}".format(output=self.output)
        # reason for 'vcfuniq': freebayes sometimes report duplicate variants that need to be uniqified.
        rm_regions_cmd = "rm {}".format(regions_file)
        return " && ".join([bed_to_regions_cmd, freebayes_cmd, rm_regions_cmd])