def __init__(self, ctx): super(Mapper, self).__init__(ctx) self.ctx = ctx jc = self.ctx.getJobConf() self.__get_configuration(jc) self.hit_counter = self.ctx.getCounter(self.COUNTER_CLASS, "BLAST_HITS") self.logger = logging.getLogger("mapper") self.logger.setLevel(self.log_level) self.input_file = "temp.in" self.output_file = "temp.out" engine_logger = logging.getLogger("blastall") engine_logger.setLevel(self.log_level) self.engine = Engine(exe_file=self.blastall_exe, logger=engine_logger) try: self.db_dir = jc.get("mapred.cache.archives").split(",")[0].split( "#")[1] except IndexError: raise ValueError('bad format for "mapred.cache.archives"') self.opts = { "blastall.program": self.program, "blastall.database": os.path.join(self.db_dir, self.db_name), "blastall.out.tabular": True, "blastall.input.file": self.input_file, "blastall.output.file": self.output_file, "blastall.evalue": self.evalue, "blastall.gap.cost": self.gap_cost, "blastall.word.size": self.word_size, "blastall.filter": self.filter, }
def main(argv): logger = logging.getLogger("main") logger.setLevel(logging.DEBUG) parser = make_parser() opt, args = parser.parse_args(argv) try: input_fn = args[1] output_fn = args[2] except IndexError: parser.print_help() sys.exit(2) if opt.get_db: get_db() if opt.format_db: retcode = format_db() if retcode: logging.warn("formatdb returned %d" % retcode) OPTS["blastall.input.file"] = input_fn OPTS["blastall.output.file"] = output_fn engine = Engine(logger=logger) engine.blastall(opts=OPTS)