Пример #1
0
    def analyse_wgs_prepare(self, input, output):
        '''
            creates working directory and scripts to run for wgs pipeline
        '''
        prefix = re.sub('.mapped.bam$', '',
                        input)  # full path without mapped.bam
        tumour_id = prefix.split('/')[-1]  # e.g. CMHS1
        normal_id = util.find_normal(
            tumour_id,
            open("{}/cfg/sample-metadata.csv".format(config.ROOT), 'r'))
        if normal_id is None:  # nothing to do
            safe_make_dir(os.path.dirname(output))
            with open(output, 'w') as output_fh:
                output_fh.write(
                    'Normal sample does not require analysis. See the relevant tumour file.\n'
                )
            return

        tmp_id = 'wgs-{}'.format(tumour_id)
        tmp_dir = '{tmp}/{tmp_id}'.format(tmp=config.TMP, tmp_id=tmp_id)
        safe_make_dir(tmp_dir)
        safe_make_dir(os.path.dirname(output))
        command = 'cp {root}/src/util/analysisWGS.serial.sh {tmp_dir}/analysisWGS.sh && cp {root}/src/util/ds-wrapper-wgs-1.0.8.pl {tmp_dir}/ds-wrapper.pl && touch {output}'.format(
            root=config.ROOT, output=output, tmp_dir=tmp_dir)
        run_stage(self.state, 'analyse_wgs_prepare', command)
Пример #2
0
    def contest(self, input, output):
        '''
            run contest
        '''

        prefix = re.sub('.mapped.bam$', '',
                        input)  # full path without mapped.bam
        tumour_id = prefix.split('/')[-1]  # e.g. CMHS1
        normal_id = util.find_normal(
            tumour_id,
            open("{root}/cfg/sample-metadata.csv".format(root=config.ROOT),
                 'r'))

        # tumour_id is actually normal
        if normal_id is None:
            normal_id = tumour_id
            validation_data = open(
                "{root}/out/{sample}.validation".format(root=config.ROOT,
                                                        sample=normal_id),
                'r').readlines()
            normal_uuid = validation_data[1].split('\t')[8]
            with open(
                    '{tmp_dir}/{tumour_id}.contest.sh'.format(
                        tmp_dir=config.TMP, tumour_id=tumour_id),
                    'w') as analyse_fh:
                for line in open(
                        '{root}/src/util/contest.sh.template'.format(
                            root=config.ROOT), 'r'):
                    new_line = re.sub('TUMOUR', tumour_id, line)
                    new_line = re.sub('NORMAL', normal_id, new_line)
                    new_line = re.sub('UUID', normal_uuid, new_line)
                    new_line = re.sub('ROOT', config.ROOT, new_line)
                    analyse_fh.write(new_line)
        else:
            # it's a tumour
            validation_data = open(
                "{root}/out/{sample}.validation".format(root=config.ROOT,
                                                        sample=normal_id),
                'r').readlines()
            normal_uuid = validation_data[1].split('\t')[8]
            with open(
                    '{tmp_dir}/{tumour_id}.contest.sh'.format(
                        tmp_dir=config.TMP, tumour_id=tumour_id),
                    'w') as analyse_fh:
                for line in open(
                        '{root}/src/util/contest.sh.template'.format(
                            root=config.ROOT), 'r'):
                    new_line = re.sub('TUMOUR', tumour_id, line)
                    new_line = re.sub('NORMAL', normal_id, new_line)
                    new_line = re.sub('UUID', normal_uuid, new_line)
                    new_line = re.sub('ROOT', config.ROOT, new_line)
                    analyse_fh.write(new_line)

        command = 'bash {tmp_dir}/{tumour_id}.contest.sh 2>{prefix}.contest.log.err 1>{prefix}.contest.log.out && touch "{output}"'.format(
            tmp_dir=config.TMP,
            tumour_id=tumour_id,
            output=output,
            prefix=prefix)

        run_stage(self.state, 'contest', command)
Пример #3
0
    def _analyse_wgs_with_command(self, input, output, subcommand, cpu=4):
        '''
          take mapped bams and generate variant calls by running the sanger pipeline cgpwgs
        '''
        input = input[0]
        prefix = re.sub('.mapped.bam$', '',
                        input)  # full path without mapped.bam
        tumour_id = prefix.split('/')[-1]  # e.g. CMHS1
        normal_id = util.find_normal(
            tumour_id,
            open("{}/cfg/sample-metadata.csv".format(config.ROOT), 'r'))
        if normal_id is None:  # nothing to do
            safe_make_dir(os.path.dirname(output))
            with open(output, 'w') as output_fh:
                output_fh.write(
                    'Normal sample does not require analysis. See the relevant tumour file.\n'
                )
            return

        tmp_id = 'wgs-{}-{}'.format(config.WGS_VERSION, tumour_id)
        tmp_dir = '{tmp}/{tmp_id}'.format(tmp=config.TMP, tmp_id=tmp_id)
        safe_make_dir('{}/home'.format(tmp_dir))

        # make subcommand analysis script
        with open(
                '{tmp_dir}/analyse-{subcommand}.sh'.format(
                    tmp_dir=tmp_dir, subcommand=subcommand),
                'w') as analyse_fh:
            for line in open(
                    '{root}/src/util/analyse-{wgs_version}.sh.template'.format(
                        wgs_version=config.WGS_VERSION, root=config.ROOT),
                    'r'):  #analyse-1.1.2.sh.template
                new_line = re.sub('TMP_ID', tmp_id, line)
                new_line = re.sub('TUMOUR', tumour_id, new_line)
                new_line = re.sub('NORMAL', normal_id, new_line)
                new_line = re.sub('COMMAND', subcommand, new_line)
                new_line = re.sub('WGS_VERSION', config.WGS_VERSION, new_line)
                new_line = re.sub('CPULIMIT', str(cpu), new_line)
                analyse_fh.write(new_line)

        command = 'singularity exec -i --bind {in_dir}:/mnt/in,{out}:/mnt/out,{reference}:/mnt/reference,{tmp}:/mnt/tmp --workdir {tmp_dir} --home {tmp_dir}/home:/home/z --contain {root}/img/cgpwgs-{wgs_version}.img bash /mnt/tmp/{tmp_id}/analyse-{subcommand}.sh 1>{prefix}.wgs.{subcommand}.{wgs_version}.log.out 2>{prefix}.wgs.{subcommand}.{wgs_version}.log.err && touch {output}'.format(
            root=config.ROOT,
            in_dir=config.IN,
            out=config.OUT,
            reference=config.REFERENCE,
            tmp=config.TMP,
            tmp_dir=tmp_dir,
            tmp_id=tmp_id,
            prefix=prefix,
            output=output,
            subcommand=subcommand,
            wgs_version=config.WGS_VERSION)
        run_stage(self.state, 'analyse_wgs_{}'.format(subcommand), command)
Пример #4
0
    def callable_bases(self, input, output):
        '''
            run callable bases
        '''

        MINIMUM_COVERAGE_TUMOR = '17'
        MINIMUM_COVERAGE_NORMAL = '10'

        prefix = re.sub('.mapped.bam$', '',
                        input)  # full path without mapped.bam
        tumour_id = prefix.split('/')[-1]  # e.g. CMHS1
        normal_id = util.find_normal(
            tumour_id,
            open("{root}/cfg/sample-metadata.csv".format(root=config.ROOT),
                 'r'))

        # nothing to do for normal sample
        if normal_id is None:
            safe_make_dir(os.path.dirname(output))
            with open(output, 'w') as output_fh:
                output_fh.write(
                    'Normal sample does not require analysis. See the relevant tumour file.\n'
                )
            return

        # it's a tumour
        with open(
                '{tmp_dir}/{tumour_id}.callable_bases.sh'.format(
                    tmp_dir=config.TMP, tumour_id=tumour_id),
                'w') as analyse_fh:
            for line in open(
                    '{root}/src/util/callable_bases.sh.template'.format(
                        root=config.ROOT), 'r'):
                new_line = re.sub('TUMOUR', tumour_id, line)
                new_line = re.sub('NORMAL', normal_id, new_line)
                new_line = re.sub('ROOT', config.ROOT, new_line)
                new_line = re.sub('TMP_DIR', config.TMP, new_line)
                new_line = re.sub('MIN_TUM', MINIMUM_COVERAGE_TUMOR, new_line)
                new_line = re.sub('MIN_NORM', MINIMUM_COVERAGE_NORMAL,
                                  new_line)
                analyse_fh.write(new_line)

        command = 'bash {tmp_dir}/{tumour_id}.callable_bases.sh 2>{prefix}.callable_bases.log.err 1>{prefix}.callable_bases.log.out && touch "{output}"'.format(
            tmp_dir=config.TMP,
            tumour_id=tumour_id,
            output=output,
            prefix=prefix)

        run_stage(self.state, 'callable_bases', command)
Пример #5
0
    def hmmcopy(self, input, output):
        '''
            run hmmcopy
        '''
        prefix = re.sub('.mapped.bam$', '',
                        input)  # full path without mapped.bam
        tumour_id = prefix.split('/')[-1]  # e.g. CMHS1
        normal_id = util.find_normal(
            tumour_id,
            open("{}/cfg/sample-metadata.csv".format(config.ROOT), 'r'))

        # tumour_id is actually a normal
        if normal_id is None:
            target_dir = '{}.hmmcopy'.format(prefix)
            safe_make_dir(target_dir)
            with open('{target_dir}/hmmcopy.sh'.format(target_dir=target_dir),
                      'w') as analyse_fh:
                for line in open(
                        '{root}/src/util/hmmcopy-normal.sh.template'.format(
                            root=config.ROOT), 'r'):
                    new_line = re.sub('NORMAL', tumour_id, line)
                    new_line = re.sub('ROOT', config.ROOT, new_line)
                    new_line = re.sub('TARGET_DIR', target_dir, new_line)
                    analyse_fh.write(new_line)

            command = 'bash {target_dir}/hmmcopy.sh 2>{prefix}.hmmcopy.log.err 1>{prefix}.hmmcopy.log.out && touch "{output}"'.format(
                target_dir=target_dir, output=output, prefix=prefix)

        else:
            # it's a tumour
            target_dir = '{}.hmmcopy'.format(prefix)
            safe_make_dir(target_dir)

            with open('{target_dir}/hmmcopy.sh'.format(target_dir=target_dir),
                      'w') as analyse_fh:
                for line in open(
                        '{root}/src/util/hmmcopy.sh.template'.format(
                            root=config.ROOT), 'r'):
                    new_line = re.sub('TUMOUR', tumour_id, line)
                    new_line = re.sub('NORMAL', normal_id, new_line)
                    new_line = re.sub('ROOT', config.ROOT, new_line)
                    new_line = re.sub('TARGET_DIR', target_dir, new_line)
                    analyse_fh.write(new_line)

            command = 'bash {target_dir}/hmmcopy.sh 2>{prefix}.hmmcopy.log.err 1>{prefix}.hmmcopy.log.out && touch "{output}"'.format(
                target_dir=target_dir, output=output, prefix=prefix)

        run_stage(self.state, 'hmmcopy', command)
Пример #6
0
    def delly(self, input, output, cpu=6):
        '''
          run the delly singularity container
        '''
        prefix = re.sub('.mapped.bam$', '',
                        input)  # full path without mapped.bam
        tumour_id = prefix.split('/')[-1]  # e.g. CMHS1
        normal_id = util.find_normal(
            tumour_id,
            open("{}/cfg/sample-metadata.csv".format(config.ROOT), 'r'))

        # nothing to do for normal sample
        if normal_id is None:
            safe_make_dir(os.path.dirname(output))
            with open(output, 'w') as output_fh:
                output_fh.write(
                    'Normal sample does not require analysis. See the relevant tumour file.\n'
                )
            return

        # it's a tumour
        tmp_id = 'delly-{}-{}'.format(tumour_id, str(uuid.uuid4()))
        tmp_dir = '{tmp}/{tmp_id}'.format(tmp=config.TMP, tmp_id=tmp_id)
        safe_make_dir(tmp_dir)
        with open('{tmp_dir}/delly.sh'.format(tmp_dir=tmp_dir),
                  'w') as analyse_fh:
            for line in open(
                    '{root}/src/util/delly.sh.template'.format(
                        root=config.ROOT), 'r'):
                new_line = re.sub('TUMOUR', tumour_id, line)
                new_line = re.sub('NORMAL', normal_id, new_line)
                new_line = re.sub('CORES', str(cpu), new_line)
                analyse_fh.write(new_line)

        command = 'singularity exec -i --bind {in_dir}:/mnt/in,{out}:/mnt/out,{reference}:/mnt/reference,{tmp_dir}:/mnt/tmp --workdir {tmp_dir} --contain {root}/img/delly-2.0.0.img bash /mnt/tmp/delly.sh 1>{prefix}.delly.log.out 2>{prefix}.delly.log.err && mv {tmp_dir}/workdir {prefix}.delly.results && touch "{output}" && rm -r "{tmp_dir}"'.format(
            root=config.ROOT,
            in_dir=config.IN,
            out=config.OUT,
            reference=config.REFERENCE_DELLY,
            tmp=config.TMP,
            tmp_dir=tmp_dir,
            tmp_id=tmp_id,
            prefix=prefix,
            output=output)

        run_stage(self.state, 'delly', command)
Пример #7
0
    def somatic_sniper(self, input, output):
        '''
            run somatic sniper
        '''

        prefix = re.sub('.mapped.bam$', '',
                        input)  # full path without mapped.bam
        tumour_id = prefix.split('/')[-1]  # e.g. CMHS1
        normal_id = util.find_normal(
            tumour_id,
            open("{root}/cfg/sample-metadata.csv".format(root=config.ROOT),
                 'r'))

        # nothing to do for normal sample
        if normal_id is None:
            safe_make_dir(os.path.dirname(output))
            with open(output, 'w') as output_fh:
                output_fh.write(
                    'Normal sample does not require analysis. See the relevant tumour file.\n'
                )
            return

        # it's a tumour
        with open(
                '{tmp_dir}/{tumour_id}.somatic_sniper.sh'.format(
                    tmp_dir=config.TMP, tumour_id=tumour_id),
                'w') as analyse_fh:
            for line in open(
                    '{root}/src/util/somatic_sniper.sh.template'.format(
                        root=config.ROOT), 'r'):
                new_line = re.sub('TUMOUR_ID', tumour_id, line)
                new_line = re.sub('NORMAL_ID', normal_id, new_line)
                new_line = re.sub('ROOT_PATH', config.ROOT, new_line)
                analyse_fh.write(new_line)

        command = 'bash {tmp_dir}/{tumour_id}.somatic_sniper.sh 2>{prefix}.somatic_sniper.log.err 1>{prefix}.somatic_sniper.log.out && touch "{output}"'.format(
            tmp_dir=config.TMP,
            tumour_id=tumour_id,
            output=output,
            prefix=prefix)

        run_stage(self.state, 'somatic_sniper', command)
Пример #8
0
    def gridss(self, input, output):
        '''
            run gridss
        '''
        prefix = re.sub('.mapped.bam$', '',
                        input)  # full path without mapped.bam
        tumour_id = prefix.split('/')[-1]  # e.g. CMHS1
        normal_id = util.find_normal(
            tumour_id,
            open("{}/cfg/sample-metadata.csv".format(config.ROOT), 'r'))

        # nothing to do for normal sample
        if normal_id is None:
            safe_make_dir(os.path.dirname(output))
            with open(output, 'w') as output_fh:
                output_fh.write(
                    'Normal sample does not require analysis. See the relevant tumour file.\n'
                )
            return

        # it's a tumour
        tmp_id = 'gridss-{}-{}'.format(tumour_id, str(uuid.uuid4()))
        tmp_dir = '{tmp}/{tmp_id}'.format(tmp=config.TMP, tmp_id=tmp_id)
        safe_make_dir(tmp_dir)

        with open('{tmp_dir}/gridss.sh'.format(tmp_dir=tmp_dir),
                  'w') as analyse_fh:
            for line in open(
                    '{root}/src/util/gridss.sh.template'.format(
                        root=config.ROOT), 'r'):
                new_line = re.sub('TUMOUR', tumour_id, line)
                new_line = re.sub('NORMAL', normal_id, new_line)
                new_line = re.sub('ROOT', config.ROOT, new_line)
                new_line = re.sub('ACCOUNT', config.ACCOUNT, new_line)
                analyse_fh.write(new_line)

        #command = 'bash {tmp_dir}/muse.sh && touch "{output}" && rm -r "{tmp_dir}"'.format(tmp_dir=tmp_dir, output=output)
        command = 'bash {tmp_dir}/gridss.sh 2>{prefix}.gridss.log.err 1>{prefix}.gridss.log.out && touch "{output}" && rm -r {tmp_dir}'.format(
            tmp_dir=tmp_dir, output=output, prefix=prefix)

        run_stage(self.state, 'gridss', command)
Пример #9
0
    def muse(self, input, output):
        '''
          run muse
        '''
        interval = 50000000  # chunk size to break chromosomes into for muse

        prefix = re.sub('.mapped.bam$', '',
                        input)  # full path without mapped.bam
        tumour_id = prefix.split('/')[-1]  # e.g. CMHS1
        normal_id = util.find_normal(
            tumour_id,
            open("{}/cfg/sample-metadata.csv".format(config.ROOT), 'r'))

        # nothing to do for normal sample
        if normal_id is None:
            safe_make_dir(os.path.dirname(output))
            with open(output, 'w') as output_fh:
                output_fh.write(
                    'Normal sample does not require analysis. See the relevant tumour file.\n'
                )
            return

        # it's a tumour
        tmp_id = 'muse-{}-{}'.format(tumour_id, str(uuid.uuid4()))
        tmp_dir = '{tmp}/{tmp_id}'.format(tmp=config.TMP, tmp_id=tmp_id)
        safe_make_dir(tmp_dir)

        # build combine variants commands
        muse_commands = []
        cmd = ['samtools', 'view', '-H', input]
        proc = subprocess.Popen(cmd, stdout=subprocess.PIPE)
        for line in proc.stdout.readlines():
            if line.startswith('@SQ\t'):
                fields = line.strip().split('\t')
                chromosome = fields[1].split(':')[1]  # SN
                size = int(fields[2].split(':')[1])  # LN
                # now write regions as zero based
                current = 0
                while current < size:
                    final = min(size, current + interval)
                    muse_commands.append(
                        '$MUSE call -O {tmp_dir}/tmp{chromosome}_{current}_{final} -f $REFERENCE -r "{chromosome}:{current}-{final}" $TMR_ABS $NRML_ABS'
                        .format(tmp_dir=tmp_dir,
                                chromosome=chromosome,
                                current=current,
                                final=final,
                                prefix=prefix))
                    current = final

        with open('{tmp_dir}/muse.sh'.format(tmp_dir=tmp_dir),
                  'w') as analyse_fh:
            for line in open(
                    '{root}/src/util/muse.sh.template'.format(
                        root=config.ROOT), 'r'):
                new_line = re.sub('TUMOUR', tumour_id, line)
                new_line = re.sub('NORMAL', normal_id, new_line)
                new_line = re.sub('TMP_DIR', tmp_dir, new_line)
                new_line = re.sub('ROOT', config.ROOT, new_line)
                new_line = re.sub('CALL_VARIANTS', '\n'.join(muse_commands),
                                  new_line)

                analyse_fh.write(new_line)

        #command = 'bash {tmp_dir}/muse.sh && touch "{output}" && rm -r "{tmp_dir}"'.format(tmp_dir=tmp_dir, output=output)
        command = 'bash {tmp_dir}/muse.sh 2>{prefix}.muse.log.err 1>{prefix}.muse.log.out && touch "{output}" && rm -r {tmp_dir}'.format(
            tmp_dir=tmp_dir, output=output, prefix=prefix)

        run_stage(self.state, 'muse', command)