def stages(commands: Sequence[Sequence[Union[str, Callable]]], subdirs: Sequence[str] = None, keep_data: bool = False) -> str: """ Execute a list of commands in a temporary directory cleaning them up unless otherwise requested. Parameters ---------- commands : Sequence[Sequence[Union[str, Callable]]] A collection of tuples of commands composed either of str elements or callable objects which will be invoked with the keyword argument "tempdir". The resulting list of strings will be passed to subprocess.check_call. subdirs : Sequence[str] A collection of paths which should be created as subdirectories within the temporary directory used by this invocation. keep_data : bool If not true, shutil.rmtree will be called on the temporary directory used by this invocation. Return ------ str : Path to the temporary directory used by this invocation. Environment variables --------------------- STARFISH_COVERAGE : If set, then command lists will have `coverage run ...` options prepended before execution. """ if keep_data: tempobj = None tempdir = tempfile.mkdtemp() else: tempobj = tempfile.TemporaryDirectory() tempdir = tempobj.name def callback(interval): print(" ".join(stage[:2]), " ==> {} seconds".format(interval)) try: if subdirs: for subdir in subdirs: os.makedirs( "{tempdir}".format(tempdir=os.path.join(tempdir, subdir))) for stage in commands: cmdline = prepare_stage(stage, tempdir) with clock.timeit(callback): subprocess.check_call(cmdline) return tempdir finally: if tempobj: tempobj.cleanup()
def test_run_pipeline(self): tempdir = tempfile.mkdtemp() coverage_enabled = "STARFISH_COVERAGE" in os.environ def callback(interval): print(" ".join(stage[:2]), " ==> {} seconds".format(interval)) try: for subdir in TestWithIssData.SUBDIRS: os.makedirs( "{tempdir}".format(tempdir=os.path.join(tempdir, subdir))) for stage in TestWithIssData.STAGES: cmdline = [ element(tempdir=tempdir) if callable(element) else element for element in stage ] if cmdline[0] == "starfish" and coverage_enabled: coverage_cmdline = [ "coverage", "run", "-p", "--source", "starfish", "-m", "starfish", ] coverage_cmdline.extend(cmdline[1:]) cmdline = coverage_cmdline with clock.timeit(callback): subprocess.check_call(cmdline) with open(os.path.join(tempdir, "results", "decoded_table.json")) as fh: results = json.load(fh) counts = collections.defaultdict(lambda: 0) for record in results: counts[record["barcode"]] += 1 tuples = [(count, barcode) for barcode, count in counts.items()] tuples.sort(reverse=True) self.assertEqual("AAGC", tuples[0][1]) self.assertEqual("AGGC", tuples[1][1]) finally: if os.getenv("TEST_ISS_KEEP_DATA") is None: shutil.rmtree(tempdir)
def test_run_pipeline(self): tempdir = tempfile.mkdtemp() def callback(interval): print(" ".join(stage[:2]), " ==> {} seconds".format(interval)) try: for subdir in TestWithIssData.SUBDIRS: os.makedirs( "{tempdir}".format(tempdir=os.path.join(tempdir, subdir))) for stage in TestWithIssData.STAGES: cmdline = [ element(tempdir=tempdir) if callable(element) else element for element in stage ] if cmdline[0] == 'starfish': coverage_cmdline = [ "coverage", "run", "-p", "--source", "starfish", "-m", "starfish", ] coverage_cmdline.extend(cmdline[1:]) cmdline = coverage_cmdline with clock.timeit(callback): subprocess.check_call(cmdline) with open(os.path.join(tempdir, "results", "decoder_table.csv")) as fh: lines = fh.readlines() counts = collections.defaultdict(lambda: 0) for line in lines[1:]: line_barcode = line.split(",")[0] counts[line_barcode] += 1 tuples = [(count, barcode) for barcode, count in counts.items()] tuples.sort(reverse=True) self.assertEqual("AAGC", tuples[0][1]) self.assertEqual("AGGC", tuples[1][1]) finally: shutil.rmtree(tempdir)
def test_run_pipeline(self): tempdir = tempfile.mkdtemp() coverage_enabled = "STARFISH_COVERAGE" in os.environ def callback(interval): print(" ".join(stage[:2]), " ==> {} seconds".format(interval)) try: for subdir in TestWithIssData.SUBDIRS: os.makedirs( "{tempdir}".format(tempdir=os.path.join(tempdir, subdir))) for stage in TestWithIssData.STAGES: cmdline = [ element(tempdir=tempdir) if callable(element) else element for element in stage ] if cmdline[0] == "starfish" and coverage_enabled: coverage_cmdline = [ "coverage", "run", "-p", "--source", "starfish", "-m", "starfish", ] coverage_cmdline.extend(cmdline[1:]) cmdline = coverage_cmdline with clock.timeit(callback): subprocess.check_call(cmdline) intensities = IntensityTable.load( os.path.join(tempdir, "results", "spots.nc")) genes, counts = np.unique( intensities.coords[Codebook.Constants.GENE.value], return_counts=True) gene_counts = pd.Series(counts, genes) assert gene_counts['ACTB_human'] > gene_counts['ACTB_mouse'] finally: if os.getenv("TEST_ISS_KEEP_DATA") is None: shutil.rmtree(tempdir)