示例#1
0
def mapred_site_xml_defaults(workdir, node_info):
    '''
    Default entries for the mapred-site.xml config file.
    '''
    mem_dflts = memory_defaults(node_info)

    java_map_mem = format_memory(0.8 * mem_dflts.ram_per_container,
                                 round_val=True)
    java_reduce_mem = format_memory(0.8 * 2 * mem_dflts.ram_per_container,
                                    round_val=True)
    # In my tests, Yarn gets shirty if I try to run a job and these values are set to
    # more then 8g:
    map_memory = round_mb(mem_dflts.ram_per_container)
    reduce_memory = round_mb(2 * mem_dflts.ram_per_container)
    dflts = {
        'mapreduce.framework.name':
        'yarn',
        'mapreduce.map.java.opts':
        '-Xmx%s' % java_map_mem,
        'mapreduce.map.memory.mb':
        map_memory,
        'mapreduce.reduce.java.opts':
        '-Xmx%s' % java_reduce_mem,
        'mapreduce.reduce.memory.mb':
        reduce_memory,
        # io.sort.mb can't be > 2047mb
        'mapreduce.task.io.sort.mb':
        min(int(0.4 * map_memory), 2047),
        'yarn.app.mapreduce.am.staging-dir':
        '$localworkdir/tmp/hadoop-yarn/staging',
    }
    return dflts
示例#2
0
def mapred_site_xml_defaults(workdir, node_info):
    '''
    Default entries for the mapred-site.xml config file.
    '''
    mem_dflts = memory_defaults(node_info)

    java_map_mem = format_memory(0.8 * mem_dflts.ram_per_container, round_val=True)
    java_reduce_mem = format_memory(0.8 * 2 * mem_dflts.ram_per_container, round_val=True)
    # In my tests, Yarn gets shirty if I try to run a job and these values are set to
    # more then 8g:
    map_memory = round_mb(mem_dflts.ram_per_container)
    reduce_memory = round_mb(2 * mem_dflts.ram_per_container)
    dflts = {
        'mapreduce.framework.name': 'yarn',
        'mapreduce.map.java.opts': '-Xmx%s' % java_map_mem,
        'mapreduce.map.memory.mb': map_memory,
        'mapreduce.reduce.java.opts': '-Xmx%s' % java_reduce_mem,
        'mapreduce.reduce.memory.mb': reduce_memory,
        # io.sort.mb can't be > 2047mb
        'mapreduce.task.io.sort.mb': min(int(0.4 * map_memory), 2047),
        'yarn.app.mapreduce.am.staging-dir': '$localworkdir/tmp/hadoop-yarn/staging',
    }
    return dflts
 def test_format_memory(self):
     pm = hcc.parse_memory
     self.assertEqual(hcc.format_memory(1), '1b')
     self.assertEqual(hcc.format_memory(1024), '1k')
     self.assertEqual(hcc.format_memory(2000), '2000b')
     self.assertEqual(hcc.format_memory(1024*1024), '1m')
     self.assertEqual(hcc.format_memory(1024*1024, round_val=True), '1m')
     self.assertEqual(hcc.format_memory(pm('0.5t')), '512g')
     self.assertEqual(hcc.format_memory(pm('0.5t'), round_val=True), '512g')
     self.assertEqual(hcc.format_memory(pm('8g')), '8g')
     self.assertEqual(hcc.format_memory(pm('9t')), '9t')
     self.assertEqual(hcc.format_memory(pm('7.5m')), '7680k')
     self.assertEqual(hcc.format_memory(pm('7.5m'), round_val=True), '8m')
     # e.g. from our high memory machines
     self.assertEqual(hcc.format_memory(540950507520, round_val=True), '504g')
示例#4
0
 def test_format_memory(self):
     pm = hcc.parse_memory
     self.assertEqual(hcc.format_memory(1), '1b')
     self.assertEqual(hcc.format_memory(1024), '1k')
     self.assertEqual(hcc.format_memory(2000), '2000b')
     self.assertEqual(hcc.format_memory(1024 * 1024), '1m')
     self.assertEqual(hcc.format_memory(1024 * 1024, round_val=True), '1m')
     self.assertEqual(hcc.format_memory(pm('0.5t')), '512g')
     self.assertEqual(hcc.format_memory(pm('0.5t'), round_val=True), '512g')
     self.assertEqual(hcc.format_memory(pm('8g')), '8g')
     self.assertEqual(hcc.format_memory(pm('9t')), '9t')
     self.assertEqual(hcc.format_memory(pm('7.5m')), '7680k')
     self.assertEqual(hcc.format_memory(pm('7.5m'), round_val=True), '8m')
     # e.g. from our high memory machines
     self.assertEqual(hcc.format_memory(540950507520, round_val=True),
                      '504g')