Exemplo n.º 1
0
def configure_env(args):
    must_reset = False
    for n in "HADOOP_HOME", "HADOOP_CONF_DIR":
        v = getattr(args, n.lower())
        if v:
            os.environ[n] = v
            must_reset = True
    if must_reset:
        hdfs.reset()
Exemplo n.º 2
0
def main(argv=sys.argv[1:]):
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument("--conf-dir", metavar="HADOOP_CONF_DIR")
    args = parser.parse_args(argv)
    if args.conf_dir:
        os.environ["HADOOP_CONF_DIR"] = os.path.abspath(args.conf_dir)
        hdfs.reset()
    fs = hdfs.hdfs()
    print "--- OPEN ---"
    dump_status(fs)
    print "cwd:", fs.working_directory()
    print
    fs.close()
    print "--- CLOSED ---"
    dump_status(fs)
Exemplo n.º 3
0
def main(argv=sys.argv[1:]):
  parser = argparse.ArgumentParser(description=__doc__)
  parser.add_argument("--conf-dir", metavar="HADOOP_CONF_DIR")
  args = parser.parse_args(argv)
  if args.conf_dir:
    os.environ["HADOOP_CONF_DIR"] = os.path.abspath(args.conf_dir)
    hdfs.reset()
  fs = hdfs.hdfs()
  print "--- OPEN ---"
  dump_status(fs)
  print "cwd:", fs.working_directory()
  print
  fs.close()
  print "--- CLOSED ---"
  dump_status(fs)
Exemplo n.º 4
0
def main(argv):

  parser = make_parser()
  opt, args = parser.parse_args()
  try:
    input_fasta = args[0]
    db_archive = args[1]
  except IndexError:
    parser.print_help()
    sys.exit(2)

  STR_GENERATOR.prefix = os.path.basename(input_fasta)

  logger = logging.getLogger()
  for h in logger.handlers:
    logger.removeHandler(h)
  opt.log_level_str = opt.log_level
  opt.log_level = getattr(logging, opt.log_level)
  kwargs = {'format': LOG_FORMAT,
            'datefmt': LOG_DATEFMT,
            'level': opt.log_level}
  if opt.log_file:
    kwargs['filename'] = opt.log_file
  logging.basicConfig(**kwargs)

  logger.debug("cli args: %r" % (args,))
  logger.debug("cli opts: %s" % opt)

  if opt.mr_dump_file:
    opt.mr_dump_file = open(opt.mr_dump_file, "w")
  else:
    opt.mr_dump_file = sys.stderr
  
  if not opt.blast_db:
    opt.blast_db = os.path.basename(db_archive).split(".", 1)[0]
    logger.info("--blast-db not provided: setting to %r" % opt.blast_db)
  
  os.environ["HADOOP_HOME"] = opt.hadoop_home
  if not opt.hadoop:
    opt.hadoop = os.path.join(opt.hadoop_home, "bin/hadoop")
  if not opt.hadoop_conf_dir:
    opt.hadoop_conf_dir = os.path.join(opt.hadoop_home, "conf")
  os.environ["HADOOP_CONF_DIR"] = opt.hadoop_conf_dir
  hdfs.reset()

  fs = hdfs.hdfs()
  logger.debug("hdfs params: host=%s, port=%d" % (fs.host, fs.port))
  lfs = hdfs.hdfs("", 0)
  runner = Runner(fs, lfs, logger)

  try:
    db_archive_hdfs = runner.upload_archive(db_archive)
    blast_input_hdfs = runner.run_f2t(input_fasta, opt)
    blast_output_hdfs = runner.run_blast(blast_input_hdfs, db_archive_hdfs,
                                         opt)
    runner.collect_output(blast_output_hdfs, opt)
    logger.info("all done")
  finally:
    lfs.close()
    fs.close()
    if opt.mr_dump_file is not sys.stderr:
      opt.mr_dump_file.close()