def setup_example(config): root = os.path.join(config.destination, 'zonkey_pipeline') with tarfile.TarFile(config.tablefile) as tar_handle: example_files = [] existing_files = [] for member in tar_handle.getmembers(): if os.path.dirname(member.name) == 'examples' and member.isfile(): example_files.append(member) destination = fileutils.reroot_path(root, member.name) if os.path.exists(destination): existing_files.append(destination) if existing_files: print_err("Output files already exist at destination:\n - %s" % ("\n - ".join(map(repr, existing_files)))) return 1 elif not example_files: print_err("Sample database %r does not contain example data; " "cannot proceed." % (config.tablefile,)) return 1 if not os.path.exists(root): fileutils.make_dirs(root) for member in example_files: destination = fileutils.reroot_path(root, member.name) src_handle = tar_handle.extractfile(member) with open(destination, 'w') as out_handle: shutil.copyfileobj(src_handle, out_handle) print_info("Sucessfully saved example data in %r" % (root,)) return 0
def finalize(self): """Called by the pipeline at the termination of a run. By default, this function prints the location of the log-file if one was created during the run (e.g. if there were errors), and a summary of all nodes. """ runtime = (self._end_time or 0) - (self._start_time or 0) if self.states[self.ERROR]: print_err("Done; but errors were detected ...") else: print_info("Done ...") print_info() rows = [(" Number of nodes:", sum(self.states)), (" Number of done nodes:", self.states[self.DONE]), (" Number of runable nodes:", self.states[self.RUNABLE]), (" Number of queued nodes:", self.states[self.QUEUED]), (" Number of outdated nodes:", self.states[self.OUTDATED]), (" Number of failed nodes:", self.states[self.ERROR]), (" Pipeline runtime:", _fmt_runtime(round(runtime)))] for line in text.padded_table(rows): print_info(line) print_info("\nUse --list-output-files to view status of output files.") logfile = paleomix.logger.get_logfile() if logfile: print_debug("Log-file located at %r" % (logfile,)) print_info()
def finalize(self): """Called by the pipeline at the termination of a run. By default, this function prints the location of the log-file if one was created during the run (e.g. if there were errors), and a summary of all nodes. """ runtime = (self._end_time or 0) - (self.start_time or 0) if self.states[self.ERROR]: print_err("Done; but errors were detected ...") else: print_info("Done ...") print_info() rows = [(" Number of nodes:", sum(self.states)), (" Number of done nodes:", self.states[self.DONE]), (" Number of runable nodes:", self.states[self.RUNABLE]), (" Number of queued nodes:", self.states[self.QUEUED]), (" Number of outdated nodes:", self.states[self.OUTDATED]), (" Number of failed nodes:", self.states[self.ERROR]), (" Pipeline runtime:", _fmt_runtime(runtime))] for line in text.padded_table(rows): print_info(line) print_info("\nUse --list-output-files to view status of output files.") logfile = paleomix.logger.get_logfile() if logfile: print_debug("Log-file located at %r" % (logfile, )) print_info()
def main(argv): try: config = zonkey_config.parse_config(argv) if config is None: return 1 except zonkey_config.ConfigError, error: print_err(error) return 1
def run(config, args, pipeline_variant): if pipeline_variant not in ("bam", "trim"): raise ValueError("Unexpected BAM pipeline variant (%r)" % (pipeline_variant,)) if not os.path.exists(config.temp_root): try: os.makedirs(config.temp_root) except OSError, error: print_err("ERROR: Could not create temp root:\n\t%s" % (error,)) return 1
def run(config, args, pipeline_variant): if pipeline_variant not in ("bam", "trim"): raise ValueError("Unexpected BAM pipeline variant (%r)" % (pipeline_variant, )) if not os.path.exists(config.temp_root): try: os.makedirs(config.temp_root) except OSError, error: print_err("ERROR: Could not create temp root:\n\t%s" % (error, )) return 1
def read_sample_sheets(filenames): records = {} for root in filenames: if os.path.isdir(root): filename = os.path.join(root, _FILENAME) else: root, filename = os.path.split(root)[0], root if not os.path.exists(filename): print_err("ERROR: Could not find SampleSheet file: %r" % filename) return None sample_sheet = read_alignment_records(filename) if sample_sheet is None: return None for record in sample_sheet: record["Lane"] = int(record["Lane"]) path = "%(SampleID)s_%(Index)s_L%(Lane)03i_R{Pair}_*.fastq.gz" \ % record record["Path"] = select_path(os.path.join(root, path)) key = "%(FCID)s_%(Lane)s" % record libraries = records.setdefault(record["SampleID"], {}) barcodes = libraries.setdefault(record["Index"], {}) barcodes.setdefault(key, []).append(path) # Clean up names; generate unique names for duplicate lanes for libraries in records.itervalues(): for barcodes in libraries.itervalues(): for key, paths in barcodes.items(): if len(paths) == 1: barcodes[key] = paths[0] continue counter = 1 for path in paths: new_key = "%s_%i" % (key, counter) while new_key in barcodes: counter += 1 new_key = "%s_%i" % (key, counter) barcodes[new_key] = path barcodes.pop(key) return records
def main(argv, pipeline="bam"): assert pipeline in ("bam", "trim"), pipeline options, paths = parse_args(argv) records = {} for root in paths: if os.path.isdir(root): filename = os.path.join(root, _FILENAME) else: root, filename = os.path.split(root)[0], root if not os.path.exists(filename): print_err("ERROR: Could not find SampleSheet file: %r" % filename) return 1 for record in read_alignment_records(filename): libraries = records.setdefault(record["SampleID"], {}) barcodes = libraries.setdefault(record["Index"], []) record["Lane"] = int(record["Lane"]) path = "%(SampleID)s_%(Index)s_L%(Lane)03i_R{Pair}_*.fastq.gz" \ % record record["Path"] = select_path(os.path.join(root, path)) barcodes.append(record) template = build_makefile(add_full_options=(pipeline == "bam"), add_prefix_tmpl=(pipeline == "bam")) if options.minimal: template = strip_comments(template) print(template) for (sample, libraries) in records.iteritems(): print("%s:" % sample) print(" %s:" % sample) for (library, barcodes) in libraries.iteritems(): print(" %s:" % library) for record in barcodes: print(" {FCID}_{Lane}: {Path}".format(**record)) print() print() if argv: print_info("Automatically generated makefile printed.\n" "Please check for correctness before running pipeline.") return 0
def read_alignment_records(filename): results = [] with open(filename) as records: line = records.readline() if not line: print_err("ERROR: Empty SampleSheet.csv file: %r" % (filename,)) return None header = line.strip().split(",") missing = set(("SampleID", "Index", "Lane", "FCID")) - set(header) if missing: print_err("ERROR: Required columns missing from SampleSheet file " "%r: %s" % (filename, ", ".join(map(repr, missing)))) return None for idx, line in enumerate(records, start=2): line = line.strip() if not line: continue fields = line.split(",") if len(fields) != len(header): print_err("Line %i in SampleSheet file %r does not contain " "the expected number of columns; expected %i, but " "found %i." % (idx, filename, len(header), len(fields))) return None results.append(dict(zip(header, fields))) return results
def read_alignment_records(filename): results = [] with open(filename) as records: line = records.readline() if not line: print_err("ERROR: Empty SampleSheet.csv file: %r" % (filename, )) return None header = line.strip().split(",") missing = set(("SampleID", "Index", "Lane", "FCID")) - set(header) if missing: print_err("ERROR: Required columns missing from SampleSheet file " "%r: %s" % (filename, ", ".join(map(repr, missing)))) return None for idx, line in enumerate(records, start=2): line = line.strip() if not line: continue fields = line.split(",") if len(fields) != len(header): print_err("Line %i in SampleSheet file %r does not contain " "the expected number of columns; expected %i, but " "found %i." % (idx, filename, len(header), len(fields))) return None results.append(dict(zip(header, fields))) return results
def main(argv, pipeline="bam"): assert pipeline in ("bam", "trim"), pipeline commands = ("makefile", "mkfile", "run", "dry_run", "dry-run", "dryrun", "remap", "example", "examples") if not argv or (argv[0] == "help"): _print_usage(pipeline) return 0 elif argv[0] not in commands: _print_usage(pipeline) return 1 elif argv[0] in ("mkfile", "makefile"): return bam_mkfile.main(argv[1:], pipeline=pipeline) elif argv[0] in ("remap", "remap_prefix"): # Import here to avoid circular dependency issues import paleomix.tools.bam_pipeline.remap as bam_remap return bam_remap.main(argv[1:]) elif argv[0] in ("example", "examples"): return paleomix.resources.copy_example("bam_pipeline", argv[1:]) try: config, args = bam_config.parse_config(argv, pipeline) if not args[1:]: print_err("Please specify at least one makefile!") print_err("Use --help for more information.") return 1 elif args and args[0].startswith("dry"): config.dry_run = True except bam_config.ConfigError, error: print_err(error) return 1
def main(argv): try: config, args = parse_config(argv) except ConfigError, error: print_err(error) return 1
def run(config, args, pipeline_variant): if pipeline_variant not in ("bam", "trim"): raise ValueError("Unexpected BAM pipeline variant (%r)" % (pipeline_variant, )) if not os.path.exists(config.temp_root): try: os.makedirs(config.temp_root) except OSError, error: print_err("ERROR: Could not create temp root:\n\t%s" % (error, )) return 1 if not os.access(config.temp_root, os.R_OK | os.W_OK | os.X_OK): print_err("ERROR: Insufficient permissions for temp root: '%s'" % (config.temp_root, )) return 1 # Init worker-threads before reading in any more data pipeline = Pypeline(config) try: print_info("Reading makefiles ...") makefiles = read_makefiles(config, args, pipeline_variant) except (MakefileError, paleomix.yaml.YAMLError, IOError), error: print_err("Error reading makefiles:", "\n %s:\n " % (error.__class__.__name__, ), "\n ".join(str(error).split("\n"))) return 1 logfile_template = time.strftime("bam_pipeline.%Y%m%d_%H%M%S_%%02i.log")
def run(config, args, pipeline_variant): if pipeline_variant not in ("bam", "trim"): raise ValueError("Unexpected BAM pipeline variant (%r)" % (pipeline_variant,)) if not os.path.exists(config.temp_root): try: os.makedirs(config.temp_root) except OSError, error: print_err("ERROR: Could not create temp root:\n\t%s" % (error,)) return 1 if not os.access(config.temp_root, os.R_OK | os.W_OK | os.X_OK): print_err("ERROR: Insufficient permissions for temp root: '%s'" % (config.temp_root,)) return 1 # Init worker-threads before reading in any more data pipeline = Pypeline(config) try: print_info("Reading makefiles ...") makefiles = read_makefiles(config, args, pipeline_variant) except (MakefileError, paleomix.yaml.YAMLError, IOError), error: print_err("Error reading makefiles:", "\n %s:\n " % (error.__class__.__name__,), "\n ".join(str(error).split("\n"))) return 1 logfile_template = time.strftime("bam_pipeline.%Y%m%d_%H%M%S_%%02i.log")
# Update interpreter to match the one currently in use; # this is required since we may be running from a virtual env filename = os.path.join(argv[1], 'phylo_pipeline', 'synthesize_reads.py') with open(filename) as handle: header, lines = handle.read().split('\n', 1) with open(filename, 'w') as handle: handle.write('#!%s\n' % (os.path.abspath(sys.executable, ))) handle.write(lines) return 0 elif (len(args) < 2) and ("mkfile" not in args and "makefile" not in args): print_err("\nPlease specify at least one makefile!") return 1 commands = select_commands(args.pop(0)) if any((cmd in ("makefile", "mkfile")) for (cmd, _) in commands): return mkfile.main(args[1:]) if not os.path.exists(config.temp_root): try: os.makedirs(config.temp_root) except OSError, error: print_err("ERROR: Could not create temp root:\n\t%s" % (error, )) return 1 if not os.access(config.temp_root, os.R_OK | os.W_OK | os.X_OK): print_err("ERROR: Insufficient permissions for temp root: '%s'" %
# Update interpreter to match the one currently in use; # this is required since we may be running from a virtual env filename = os.path.join(argv[1], 'phylo_pipeline', 'synthesize_reads.py') with open(filename) as handle: header, lines = handle.read().split('\n', 1) with open(filename, 'w') as handle: handle.write('#!%s\n' % (os.path.abspath(sys.executable, ))) handle.write(lines) return 0 elif (len(args) < 2) and ("mkfile" not in args and "makefile" not in args): print_err("\nPlease specify at least one makefile!") return 1 commands = select_commands(args.pop(0)) if any((cmd in ("makefile", "mkfile")) for (cmd, _) in commands): return mkfile.main(args[1:]) if not os.path.exists(config.temp_root): try: os.makedirs(config.temp_root) except OSError, error: print_err("ERROR: Could not create temp root:\n\t%s" % (error,)) return 1 if not os.access(config.temp_root, os.R_OK | os.W_OK | os.X_OK): print_err("ERROR: Insufficient permissions for temp root: '%s'"
def setup_mito_mapping(config): genomes_root = os.path.join(config.destination, "genomes") if not os.path.exists(genomes_root): fileutils.make_dirs(genomes_root) mkfile_fpath = os.path.join(config.destination, "makefile.yaml") filenames = [mkfile_fpath] for name, record in sorted(config.database.mitochondria.iteritems()): filenames.append(os.path.join(genomes_root, "%s.fasta" % (record.name,))) existing_filenames = [filename for filename in filenames if os.path.exists(filename)] # A bit strict, but avoid accidential overwrites if existing_filenames: print_err("ERROR: Output file(s) already exists, " "cannot proceed:\n %s" % ("\n ".join(map(repr, existing_filenames),))) return 1 with open(mkfile_fpath, "w") as mkfile: mkfile.write(bam_mkfile.build_makefile(add_prefix_tmpl=False, add_sample_tmpl=False)) mkfile.write("\n\nPrefixes:\n") for name, record in sorted(config.database.mitochondria.iteritems()): meta = (record.meta or "").upper() if "EXCLUDE" in meta: continue mkfile.write(" %s:\n" % (record.name,)) mkfile.write(" Path: genomes/%s.fasta\n" % (record.name,)) info = config.database.samples.get(record.name) if info is not None: mkfile.write(" # Group: %s\n" % (info.get('Group(3)', 'NA'),)) mkfile.write(" # Species: %s\n" % (info.get('Species', 'NA'),)) mkfile.write(" # Sex: %s\n" % (info.get('Sex', 'NA'),)) mkfile.write(" # Publication: %s\n" % (info.get('Publication', 'NA'),)) mkfile.write(" # Sample ID: %s\n" % (info.get('SampleID', 'NA'),)) mkfile.write('\n') fasta_fpath = os.path.join(genomes_root, "%s.fasta" % (record.name,)) with open(fasta_fpath, "w") as fasta_handle: fasta_handle.write(str(record)) fasta_handle.write("\n") mkfile.write("\n") return 0
def setup_mito_mapping(config): genomes_root = os.path.join(config.destination, "genomes") if not os.path.exists(genomes_root): fileutils.make_dirs(genomes_root) mkfile_fpath = os.path.join(config.destination, "makefile.yaml") filenames = [mkfile_fpath] for name, record in sorted(config.database.mitochondria.iteritems()): filenames.append(os.path.join(genomes_root, "%s.fasta" % (record.name,))) existing_filenames = [filename for filename in filenames if os.path.exists(filename)] # A bit strict, but avoid accidential overwrites if existing_filenames: print_err("ERROR: Output file(s) already exists, " "cannot proceed:\n %s" % ("\n ".join(map(repr, existing_filenames),))) return 1 with open(mkfile_fpath, "w") as mkfile: mkfile.write(bam_mkfile.build_makefile(add_prefix_tmpl=False, add_sample_tmpl=False)) mkfile.write("\n\nPrefixes:\n") for name, record in sorted(config.database.mitochondria.iteritems()): meta = (record.meta or "").upper() if "EXCLUDE" in meta: continue mkfile.write(" %s:\n" % (record.name,)) mkfile.write(" Path: genomes/%s.fasta\n" % (record.name,)) info = config.database.samples.get(record.name) if info is not None: mkfile.write(" # Group: %s\n" % (info.get('Group(3)', 'NA'),)) mkfile.write(" # Species: %s\n" % (info.get('Species', 'NA'),)) mkfile.write(" # Sex: %s\n" % (info.get('Sex', 'NA'),)) mkfile.write(" # Publication: %s\n" % (info.get('Publication', 'NA'),)) mkfile.write(" # Sample ID: %s\n" % (info.get('SampleID', 'NA'),)) mkfile.write('\n') fasta_fpath = os.path.join(genomes_root, "%s.fasta" % (record.name,)) with open(fasta_fpath, "w") as fasta_handle: record = FASTA( name=record.name, meta=None, sequence=record.sequence.replace('-', '')) fasta_handle.write(str(record)) fasta_handle.write("\n") mkfile.write("\n") return 0