def test_prune_pp_platform_args(self): ppfile = os.path.join(j_doe_00_01, SAMPLE, FLOWCELL, "{}-post_process.yaml".format(SAMPLE)) with open(ppfile, "r") as fh: conf = yaml.load(fh) newconf = prune_pp_platform_args(conf, keep_opts=["-t"]) self.assertIn("-t", newconf['distributed']['platform_args'].split()) self.assertNotIn("-A", newconf['distributed']['platform_args'].split())
def _to_casava_structure(self, fc): transfer_status = {} outdir_pfx = os.path.abspath(os.path.join(self.app.config.get("project", "root"), self.pargs.project, "data")) if self.pargs.transfer_dir: outdir_pfx = os.path.abspath( os.path.join(self.app.config.get("project", "root"), self.pargs.transfer_dir, "data") ) for sample in fc: key = "{}_{}".format(sample["lane"], sample["sequence"]) sources = {"files": self._prune_sequence_files(sample["files"]), "results": sample["results"]} outdir = os.path.join(outdir_pfx, sample["name"], fc.fc_id()) dirs = { "data": os.path.abspath(os.path.join(outdir_pfx, sample["name"], fc.fc_id())), "intermediate": os.path.abspath(os.path.join(outdir_pfx, sample["name"], fc.fc_id())), } self._make_output_dirs(dirs) fc_new = fc.subset("lane", sample["lane"]).subset("name", sample["name"]) targets = { "files": [src.replace(fc.path, dirs["data"]) for src in sources["files"]], "results": [src.replace(fc.path, dirs["intermediate"]) for src in sources["results"]], } fc_new.lane_files = dict( (k, [os.path.join(outdir, os.path.basename(x)) for x in v]) for k, v in fc_new.lane_files.items() ) fc_new.set_entry(key, "files", targets["files"]) fc_new.set_entry(key, "results", targets["results"]) ## Copy sample files - currently not doing lane files self._transfer_files(sources, targets) self.app.cmd.write( os.path.join(dirs["data"], "{}-bcbb-pm-config.yaml".format(sample["name"])), fc_new.as_yaml() ) transfer_status[sample["name"]] = {"files": len(sources["files"]), "results": len(sources["results"])} ## Rewrite platform_args; only keep time, workdir, account, partition, outpath and jobname pattern = "-post_process.yaml$" def pp_yaml_filter(f): return re.search(pattern, f) != None ppfiles = filtered_walk(dirs["data"], pp_yaml_filter) for pp in ppfiles: self.app.log.debug("Rewriting platform args for {}".format(pp)) with open(pp, "r") as fh: conf = yaml.load(fh) if not conf: self.app.log.warn("No configuration for {}".format(pp)) continue newconf = prune_pp_platform_args(conf) if newconf == conf: continue self.app.cmd.safe_unlink(pp) self.app.cmd.write(pp, yaml.safe_dump(newconf, default_flow_style=False, allow_unicode=True, width=1000)) # Write transfer summary self.app._output_data["stderr"].write("Transfer summary\n") self.app._output_data["stderr"].write("{:<18}{:>18}{:>18}\n".format("Sample", "Transferred files", "Results")) for k, v in transfer_status.iteritems(): self.app._output_data["stderr"].write("{:<18}{:>18}{:>18}\n".format(k, v["files"], v["results"]))
def _to_casava_structure(self, fc): transfer_status = {} outdir_pfx = os.path.abspath( os.path.join(self.app.config.get("project", "root"), self.pargs.project, "data")) if self.pargs.transfer_dir: outdir_pfx = os.path.abspath( os.path.join(self.app.config.get("project", "root"), self.pargs.transfer_dir, "data")) for sample in fc: key = "{}_{}".format(sample['lane'], sample['sequence']) sources = { "files": self._prune_sequence_files(sample['files']), "results": sample['results'] } outdir = os.path.join(outdir_pfx, sample['name'], fc.fc_id()) dirs = { "data": os.path.abspath( os.path.join(outdir_pfx, sample['name'], fc.fc_id())), "intermediate": os.path.abspath( os.path.join(outdir_pfx, sample['name'], fc.fc_id())) } self._make_output_dirs(dirs) fc_new = fc.subset("lane", sample['lane']).subset("name", sample['name']) targets = { "files": [ src.replace(fc.path, dirs["data"]) for src in sources['files'] ], "results": [ src.replace(fc.path, dirs["intermediate"]) for src in sources['results'] ] } fc_new.lane_files = dict( (k, [os.path.join(outdir, os.path.basename(x)) for x in v]) for k, v in fc_new.lane_files.items()) fc_new.set_entry(key, 'files', targets['files']) fc_new.set_entry(key, 'results', targets['results']) ## Copy sample files - currently not doing lane files self._transfer_files(sources, targets) self.app.cmd.write( os.path.join(dirs["data"], "{}-bcbb-pm-config.yaml".format(sample['name'])), fc_new.as_yaml()) transfer_status[sample['name']] = { 'files': len(sources['files']), 'results': len(sources['results']) } ## Rewrite platform_args; only keep time, workdir, account, partition, outpath and jobname pattern = "-post_process.yaml$" def pp_yaml_filter(f): return re.search(pattern, f) != None ppfiles = filtered_walk(dirs["data"], pp_yaml_filter) for pp in ppfiles: self.app.log.debug("Rewriting platform args for {}".format(pp)) with open(pp, "r") as fh: conf = yaml.load(fh) if not conf: self.app.log.warn("No configuration for {}".format(pp)) continue newconf = prune_pp_platform_args(conf) if newconf == conf: continue self.app.cmd.safe_unlink(pp) self.app.cmd.write( pp, yaml.safe_dump(newconf, default_flow_style=False, allow_unicode=True, width=1000)) # Write transfer summary self.app._output_data["stderr"].write("Transfer summary\n") self.app._output_data["stderr"].write("{:<18}{:>18}{:>18}\n".format( "Sample", "Transferred files", "Results")) for k, v in transfer_status.iteritems(): self.app._output_data["stderr"].write( "{:<18}{:>18}{:>18}\n".format(k, v['files'], v['results']))