def configure_env(args): must_reset = False for n in "HADOOP_HOME", "HADOOP_CONF_DIR": v = getattr(args, n.lower()) if v: os.environ[n] = v must_reset = True if must_reset: hdfs.reset()
def main(argv=sys.argv[1:]): parser = argparse.ArgumentParser(description=__doc__) parser.add_argument("--conf-dir", metavar="HADOOP_CONF_DIR") args = parser.parse_args(argv) if args.conf_dir: os.environ["HADOOP_CONF_DIR"] = os.path.abspath(args.conf_dir) hdfs.reset() fs = hdfs.hdfs() print "--- OPEN ---" dump_status(fs) print "cwd:", fs.working_directory() print fs.close() print "--- CLOSED ---" dump_status(fs)
def main(argv): parser = make_parser() opt, args = parser.parse_args() try: input_fasta = args[0] db_archive = args[1] except IndexError: parser.print_help() sys.exit(2) STR_GENERATOR.prefix = os.path.basename(input_fasta) logger = logging.getLogger() for h in logger.handlers: logger.removeHandler(h) opt.log_level_str = opt.log_level opt.log_level = getattr(logging, opt.log_level) kwargs = {'format': LOG_FORMAT, 'datefmt': LOG_DATEFMT, 'level': opt.log_level} if opt.log_file: kwargs['filename'] = opt.log_file logging.basicConfig(**kwargs) logger.debug("cli args: %r" % (args,)) logger.debug("cli opts: %s" % opt) if opt.mr_dump_file: opt.mr_dump_file = open(opt.mr_dump_file, "w") else: opt.mr_dump_file = sys.stderr if not opt.blast_db: opt.blast_db = os.path.basename(db_archive).split(".", 1)[0] logger.info("--blast-db not provided: setting to %r" % opt.blast_db) os.environ["HADOOP_HOME"] = opt.hadoop_home if not opt.hadoop: opt.hadoop = os.path.join(opt.hadoop_home, "bin/hadoop") if not opt.hadoop_conf_dir: opt.hadoop_conf_dir = os.path.join(opt.hadoop_home, "conf") os.environ["HADOOP_CONF_DIR"] = opt.hadoop_conf_dir hdfs.reset() fs = hdfs.hdfs() logger.debug("hdfs params: host=%s, port=%d" % (fs.host, fs.port)) lfs = hdfs.hdfs("", 0) runner = Runner(fs, lfs, logger) try: db_archive_hdfs = runner.upload_archive(db_archive) blast_input_hdfs = runner.run_f2t(input_fasta, opt) blast_output_hdfs = runner.run_blast(blast_input_hdfs, db_archive_hdfs, opt) runner.collect_output(blast_output_hdfs, opt) logger.info("all done") finally: lfs.close() fs.close() if opt.mr_dump_file is not sys.stderr: opt.mr_dump_file.close()