Exemplo n.º 1
0
def main(argv):
    """ The main method for this module.
    """
    logfile = os.path.join(os.getcwd(), 'log-mrlite-scheduler.txt')
    config_logging(logfile)
    start_time = time.time()
    logging.info('Job started at %s' %time.asctime())
    parser = MRLiteOptionParser()
    options, args = parser.parse_args(argv)
    scheduler = MRLiteJobScheduler(options)
    try:
        scheduler.start_jobs()
        scheduler.monitor_jobs()
        scheduler.quit_jobs()
        logging.info('Job finished at %s' %time.asctime())
        logging.info('Job run for %.3f seconds' %(time.time() - start_time))
    except KeyboardInterrupt:
        logging.info('Interrupted by CTRL-C')
        scheduler.kill_jobs()
        sys.exit(-1)
    except Exception:
        exc_type, exc_value, exc_traceback = sys.exc_info()
        mesg = ''.join(traceback.format_exception(exc_type, exc_value,
                                                  exc_traceback))
        logging.info('cache exception in mrlite.py')
        logging.info(mesg)
        scheduler.kill_jobs()
        sys.exit(-1)
Exemplo n.º 2
0
def main(argv):
    """ The main method for this module.
    """
    logfile = os.path.join(os.getcwd(), 'log-mrlite-scheduler.txt')
    config_logging(logfile)
    start_time = time.time()
    logging.info('Job started at %s' % time.asctime())
    parser = MRLiteOptionParser()
    options, args = parser.parse_args(argv)
    scheduler = MRLiteJobScheduler(options)
    try:
        scheduler.start_jobs()
        scheduler.monitor_jobs()
        scheduler.quit_jobs()
        logging.info('Job finished at %s' % time.asctime())
        logging.info('Job run for %.3f seconds' % (time.time() - start_time))
    except KeyboardInterrupt:
        logging.info('Interrupted by CTRL-C')
        scheduler.kill_jobs()
        sys.exit(-1)
    except Exception:
        exc_type, exc_value, exc_traceback = sys.exc_info()
        mesg = ''.join(
            traceback.format_exception(exc_type, exc_value, exc_traceback))
        logging.info('cache exception in mrlite.py')
        logging.info(mesg)
        scheduler.kill_jobs()
        sys.exit(-1)
Exemplo n.º 3
0
 def test_map_only_mode(self):
     argv = [
         "--mapreduce_cmd=/bin/echo",
         "--mapreduce_maponly_map_io=\
         {m1, m2}:WordCountMapper:recordio:/input-000??:text:/disk10/output;\
         {m3}:BigramCountMapper:text: /text-*:recordio:/disk7/output",
         "--mapreduce_log_filebase={m1,m2}/disk1/log; {m3}/disk3/tmp/log",
         "--mapreduce_tmp_dir={m1,m2}/disk1/tmp/;{m3}/tmp;",
     ]
     parser = MRLiteOptionParser(debug=True)
     options, args = parser.parse_args(argv)
     expected_map_tasks = [{
         'machine': 'm1',
         'class': 'WordCountMapper',
         'input_format': 'recordio',
         'input_path': '/input-000??',
         'output_format': 'text',
         'output_path': '/disk10/output',
         'tmp_dir': '/disk1/tmp',
         'log_filebase': '/disk1/log'
     }, {
         'machine': 'm2',
         'class': 'WordCountMapper',
         'input_format': 'recordio',
         'input_path': '/input-000??',
         'output_format': 'text',
         'output_path': '/disk10/output',
         'tmp_dir': '/disk1/tmp',
         'log_filebase': '/disk1/log'
     }, {
         'machine': 'm3',
         'class': 'BigramCountMapper',
         'input_format': 'text',
         'input_path': '/text-*',
         'output_format': 'recordio',
         'output_path': '/disk7/output',
         'tmp_dir': '/tmp',
         'log_filebase': '/disk3/tmp/log'
     }]
     self.assertEqual(expected_map_tasks, options.map_tasks)
    def test_map_only_mode(self):
        argv = [
         "--mapreduce_cmd=/bin/echo",
         "--mapreduce_maponly_map_io=\
            {m1, m2}:WordCountMapper:recordio:/input-000??:text:/disk10/output;\
            {m3}:BigramCountMapper:text: /text-*:recordio:/disk7/output",
         "--mapreduce_log_filebase={m1,m2}/disk1/log; {m3}/disk3/tmp/log", 
         "--mapreduce_tmp_dir={m1,m2}/disk1/tmp/;{m3}/tmp;",
         ]
        parser = MRLiteOptionParser(debug=True)
        options, args = parser.parse_args(argv)
        expected_map_tasks= [
            {'machine':        'm1', 
             'class':          'WordCountMapper', 
             'input_format':   'recordio', 
             'input_path':     '/input-000??', 
             'output_format':  'text', 
             'output_path':    '/disk10/output', 
             'tmp_dir':        '/disk1/tmp', 
             'log_filebase':   '/disk1/log'},

            {'machine':        'm2',
             'class':          'WordCountMapper',
             'input_format':   'recordio',
             'input_path':     '/input-000??',
             'output_format':  'text',
             'output_path':    '/disk10/output',
             'tmp_dir':        '/disk1/tmp',
             'log_filebase':   '/disk1/log' },

            {'machine':        'm3',
             'class':          'BigramCountMapper',
             'input_format':   'text',
             'input_path':     '/text-*',
             'output_format':  'recordio',
             'output_path':    '/disk7/output',
             'tmp_dir':        '/tmp',
             'log_filebase':   '/disk3/tmp/log'}
        ]
        self.assertEqual(expected_map_tasks, options.map_tasks)
Exemplo n.º 5
0
    def test_normal_case(self):
        argv = [
            "--mapreduce_cmd=/bin/echo --args1 --args2",
            "--mapreduce_map_io=\
            {m1, m2}:WordCountMapper:recordio : /input-000??-of-00005:/tmp/output;\
            {m3}:BigramCountMapper:text:/text-* :/tmp/output",
            "--mapreduce_reduce_io=\
            {m1, m2}:WordCountReducer:/disk10/reduce-input: text:/disk10/output;\
            {m2,m3}:IdentityReducer: /disk7/reduce-input:recordio:/disk7/output",
            "--mapreduce_log_filebase={m1, m2}/disk1/log;{ m3}/disk3/tmp/log",
            "--mapreduce_tmp_dir={m1,m2}/disk1/tmp/;{m3 }/tmp",
            "--mapreduce_incremental_mode",
            "--mapreduce_ssh_port=36000",
        ]

        expected_machines = set(['m1', 'm2', 'm3'])
        expected_map_tasks = [{
            'machine': 'm1',
            'class': 'WordCountMapper',
            'input_format': 'recordio',
            'input_path': '/input-000??-of-00005',
            'output_format': None,
            'output_path': '/tmp/output',
            'tmp_dir': '/disk1/tmp',
            'log_filebase': '/disk1/log'
        }, {
            'machine': 'm2',
            'class': 'WordCountMapper',
            'input_format': 'recordio',
            'input_path': '/input-000??-of-00005',
            'output_format': None,
            'output_path': '/tmp/output',
            'tmp_dir': '/disk1/tmp',
            'log_filebase': '/disk1/log'
        }, {
            'machine': 'm3',
            'class': 'BigramCountMapper',
            'input_format': 'text',
            'input_path': '/text-*',
            'output_format': None,
            'output_path': '/tmp/output',
            'tmp_dir': '/tmp',
            'log_filebase': '/disk3/tmp/log'
        }]
        expected_reduce_tasks = [
            {
                'machine': 'm1',
                'class': 'WordCountReducer',
                'input_format': None,
                'input_path': '/disk10/reduce-input',
                'output_format': 'text',
                'output_path': '/disk10/output',
                'tmp_dir': '/disk1/tmp',
                'log_filebase': '/disk1/log'
            },
            {
                'machine': 'm2',
                'class': 'WordCountReducer',
                'input_format': None,
                'input_path': '/disk10/reduce-input',
                'output_format': 'text',
                'output_path': '/disk10/output',
                'tmp_dir': '/disk1/tmp',
                'log_filebase': '/disk1/log'
            },
            {
                'machine': 'm2',
                'class': 'IdentityReducer',
                'input_format': None,
                'input_path': '/disk7/reduce-input',
                'output_format': 'recordio',
                'output_path': '/disk7/output',
                'tmp_dir': '/disk1/tmp',
                'log_filebase': '/disk1/log'
            },
            {
                'machine': 'm3',
                'class': 'IdentityReducer',
                'input_format': None,
                'input_path': '/disk7/reduce-input',
                'output_format': 'recordio',
                'output_path': '/disk7/output',
                'tmp_dir': '/tmp',
                'log_filebase': '/disk3/tmp/log'
            },
        ]
        expected_tmp_dir = {
            'm1': '/disk1/tmp',
            'm2': '/disk1/tmp',
            'm3': '/tmp'
        }
        expected_log_filebase = {
            'm1': '/disk1/log',
            'm2': '/disk1/log',
            'm3': '/disk3/tmp/log'
        }

        parser = MRLiteOptionParser(debug=True)
        options, args = parser.parse_args(argv)
        self.assertEqual(expected_machines, options.machines)
        self.assertEqual(expected_map_tasks, options.map_tasks)
        self.assertEqual(expected_reduce_tasks, options.reduce_tasks)
        self.assertEqual(expected_tmp_dir, options.mapreduce_tmp_dir)
        self.assertEqual(expected_log_filebase, options.mapreduce_log_filebase)
        self.assertEqual('/bin/echo', options.local_executable)
        self.assertEqual('--args1 --args2', options.cmd_args)
    def test_normal_case(self):
        argv = [
         "--mapreduce_cmd=/bin/echo --args1 --args2",
         "--mapreduce_map_io=\
            {m1, m2}:WordCountMapper:recordio : /input-000??-of-00005:/tmp/output;\
            {m3}:BigramCountMapper:text:/text-* :/tmp/output",

         "--mapreduce_reduce_io=\
            {m1, m2}:WordCountReducer:/disk10/reduce-input: text:/disk10/output;\
            {m2,m3}:IdentityReducer: /disk7/reduce-input:recordio:/disk7/output",

         "--mapreduce_log_filebase={m1, m2}/disk1/log;{ m3}/disk3/tmp/log", 
         "--mapreduce_tmp_dir={m1,m2}/disk1/tmp/;{m3 }/tmp",
         "--mapreduce_incremental_mode",
         "--mapreduce_ssh_port=36000",
         ]
  
        expected_machines = set(['m1', 'm2', 'm3'])
        expected_map_tasks = [
            {'machine':        'm1',
             'class':          'WordCountMapper',
             'input_format':   'recordio',
             'input_path':     '/input-000??-of-00005',
             'output_format':  None,
             'output_path':    '/tmp/output',
             'tmp_dir':        '/disk1/tmp',
             'log_filebase':   '/disk1/log'},

            {'machine':        'm2',
             'class':          'WordCountMapper',
             'input_format':   'recordio',
             'input_path':     '/input-000??-of-00005',
             'output_format':  None,
             'output_path':    '/tmp/output',
             'tmp_dir':        '/disk1/tmp',
             'log_filebase':   '/disk1/log'},

            {'machine':        'm3',
             'class':          'BigramCountMapper',
             'input_format':   'text',
             'input_path':     '/text-*',
             'output_format':  None,
             'output_path':    '/tmp/output',
             'tmp_dir':        '/tmp',
             'log_filebase':   '/disk3/tmp/log'} 
        ]
        expected_reduce_tasks = [
            {'machine':        'm1',
             'class':          'WordCountReducer',
             'input_format':   None,
             'input_path':     '/disk10/reduce-input',
             'output_format':  'text',
             'output_path':    '/disk10/output',
             'tmp_dir':        '/disk1/tmp',
             'log_filebase':   '/disk1/log'},

            {'machine':        'm2',
             'class':          'WordCountReducer',
             'input_format':   None,
             'input_path':     '/disk10/reduce-input',
             'output_format':  'text',
             'output_path':    '/disk10/output',
             'tmp_dir':        '/disk1/tmp',
             'log_filebase':   '/disk1/log'},

            {'machine':        'm2',
             'class':          'IdentityReducer',
             'input_format':   None,
             'input_path':     '/disk7/reduce-input',
             'output_format':  'recordio',
             'output_path':    '/disk7/output',
             'tmp_dir':        '/disk1/tmp',
             'log_filebase':   '/disk1/log'},

            {'machine':        'm3',
             'class':          'IdentityReducer',
             'input_format':   None,
             'input_path':     '/disk7/reduce-input',
             'output_format':  'recordio',
             'output_path':    '/disk7/output',
             'tmp_dir':        '/tmp',
             'log_filebase':   '/disk3/tmp/log'},
        ]
        expected_tmp_dir = {
            'm1': '/disk1/tmp', 
            'm2': '/disk1/tmp',
            'm3': '/tmp'}
        expected_log_filebase = {
            'm1': '/disk1/log', 
            'm2': '/disk1/log',
            'm3': '/disk3/tmp/log'} 
  

        parser = MRLiteOptionParser(debug=True)
        options, args = parser.parse_args(argv)
        self.assertEqual(expected_machines, options.machines)
        self.assertEqual(expected_map_tasks, options.map_tasks)
        self.assertEqual(expected_reduce_tasks, options.reduce_tasks)
        self.assertEqual(expected_tmp_dir, options.mapreduce_tmp_dir)
        self.assertEqual(expected_log_filebase, options.mapreduce_log_filebase)
        self.assertEqual('/bin/echo', options.local_executable)
        self.assertEqual('--args1 --args2', options.cmd_args)