示例#1
0
def setup_merged_samples(flist, sample_group_fn=_group_samples, **kw):
    """Setup analysis that merges multiple sample runs.

    :param flist: list of file names, by default *-bcbb-config.yaml files
    :param sample_group_fn: function that groups files into samples and sample runs. The function takes flist as input.

    :returns: updated flist with config files for merged samples
    """
    new_flist = []
    sample_d = sample_group_fn(flist)
    for k, v in sample_d.iteritems():
        if len(v) > 1:
            f = v[v.keys()[0]]
            out_d = os.path.join(os.path.dirname(os.path.dirname(f)), MERGED_SAMPLE_OUTPUT_DIR)
            LOG.info("Sample {} has {} sample runs; setting up merge analysis in {}".format(k, len(v), out_d))
            dry_makedir(out_d, dry_run=False)
            pp = kw.get("post_process") if kw.get("post_process", None) else f.replace("-bcbb-config.yaml", "-post_process.yaml")
            with open(pp) as fh:
                conf = yaml.load(fh)
            conf = update_pp_platform_args(conf, **{'jobname': "{}_total".format(k), 'workdir': out_d, 'output': "{}_total-bcbb.log".format(k) })
            pp_new = os.path.join(out_d, os.path.basename(pp))
            dry_unlink(pp_new, dry_run=kw.get('dry_run', True))
            dry_write(pp_new, yaml.safe_dump(conf, default_flow_style=False, allow_unicode=True, width=1000), dry_run=kw.get('dry_run', True))
            # Setup merged bcbb-config file
            bcbb_config = merge_sample_config(v.values(), sample=k, out_d=out_d, dry_run=kw.get('dry_run', True))
            bcbb_config_file = os.path.join(out_d, os.path.basename(v.values()[0]))
            bcbb_config = sort_sample_config_fastq(bcbb_config)
            if not os.path.exists(bcbb_config_file) or kw.get('new_config', False):
                dry_unlink(bcbb_config_file, dry_run=kw.get('dry_run', True))
                dry_write(bcbb_config_file, yaml.safe_dump(bcbb_config, default_flow_style=False, allow_unicode=True, width=1000), dry_run=kw.get('dry_run', True))
            ##new_flist.extend(v.values())
            new_flist.extend([bcbb_config_file])
    return new_flist
示例#2
0
def setup_merged_samples(flist, sample_group_fn=_group_samples, **kw):
    """Setup analysis that merges multiple sample runs.

    :param flist: list of file names, by default *-bcbb-config.yaml files
    :param sample_group_fn: function that groups files into samples and sample runs. The function takes flist as input.

    :returns: updated flist with config files for merged samples
    """
    new_flist = []
    sample_d = sample_group_fn(flist)
    for k, v in sample_d.iteritems():
        if len(v):
            f = v[v.keys()[0]]
            out_d = os.path.join(os.path.dirname(os.path.dirname(f)), MERGED_SAMPLE_OUTPUT_DIR)
            LOG.info("Sample {} has {} sample runs; setting up merge analysis in {}".format(k, len(v), out_d))
            dry_makedir(out_d, dry_run=False)
            pp = kw.get("post_process",f.replace("-bcbb-config.yaml", "-post_process.yaml"))
            with open(pp) as fh:
                conf = yaml.load(fh)
            conf = update_pp_platform_args(conf, **{'jobname': "{}_total".format(k), 'workdir': out_d, 'output': "{}_total-bcbb.log".format(k) })
            pp_new = os.path.join(out_d, os.path.basename(pp))
            dry_unlink(pp_new, dry_run=kw.get('dry_run', True))
            dry_write(pp_new, yaml.safe_dump(conf, default_flow_style=False, allow_unicode=True, width=1000), dry_run=kw.get('dry_run', True))
            # Setup merged bcbb-config file
            bcbb_config = merge_sample_config(v.values(), sample=k, out_d=out_d, dry_run=kw.get('dry_run', True))
            bcbb_config_file = os.path.join(out_d, os.path.basename(v.values()[0]))
            bcbb_config = sort_sample_config_fastq(bcbb_config, path=out_d)
            if not os.path.exists(bcbb_config_file) or kw.get('new_config', False):
                dry_unlink(bcbb_config_file, dry_run=kw.get('dry_run', True))
                dry_write(bcbb_config_file, yaml.safe_dump(bcbb_config, default_flow_style=False, allow_unicode=True, width=1000), dry_run=kw.get('dry_run', True))
            ##new_flist.extend(v.values())
            new_flist.extend([bcbb_config_file])
    return new_flist
示例#3
0
 def test_merge_sample_config(self):
     """Test merging sample configuration files"""
     flist = find_samples(j_doe_00_05)
     fdict = _group_samples(flist)
     out_d = os.path.join(j_doe_00_05, "P001_101_index3", "TOTAL")
     if not os.path.exists(out_d):
         os.makedirs(out_d)
     newconf = merge_sample_config(fdict["P001_101_index3"].values(), "P001_101_index3", out_d=out_d, dry_run=False)
     self.assertTrue(os.path.exists(os.path.join(j_doe_00_05, "P001_101_index3", "TOTAL", "P001_101_index3_B002BBBXX_TGACCA_L001_R1_001.fastq.gz" )))
     self.assertTrue(os.path.exists(os.path.join(j_doe_00_05, "P001_101_index3", "TOTAL", "P001_101_index3_C003CCCXX_TGACCA_L001_R1_001.fastq.gz" )))
示例#4
0
 def test_merge_sample_config(self):
     """Test merging sample configuration files"""
     flist = find_samples(j_doe_00_05)
     fdict = _group_samples(flist)
     out_d = os.path.join(j_doe_00_05, "P001_101_index3", "TOTAL")
     if not os.path.exists(out_d):
         os.makedirs(out_d)
     newconf = merge_sample_config(fdict["P001_101_index3"].values(),
                                   "P001_101_index3",
                                   out_d=out_d,
                                   dry_run=False)
     self.assertTrue(
         os.path.exists(
             os.path.join(
                 j_doe_00_05, "P001_101_index3", "TOTAL",
                 "P001_101_index3_B002BBBXX_TGACCA_L001_R1_001.fastq.gz")))
     self.assertTrue(
         os.path.exists(
             os.path.join(
                 j_doe_00_05, "P001_101_index3", "TOTAL",
                 "P001_101_index3_C003CCCXX_TGACCA_L001_R1_001.fastq.gz")))