Exemplo n.º 1
0
  def testCpuMultiple(self):
    cpus = config.list_physical_devices('CPU')
    self.assertEqual(len(cpus), 1)

    config.set_virtual_device_configuration(cpus[0], [
        context.VirtualDeviceConfiguration(),
        context.VirtualDeviceConfiguration()
    ])

    context.ensure_initialized()

    cpus = config.list_logical_devices('CPU')
    self.assertEqual(len(cpus), 2)

    with ops.device('/device:CPU:0'):
      a = constant_op.constant(1.0)
      self.evaluate(a)
    with ops.device('/device:CPU:1'):
      b = constant_op.constant(1.0)
      self.evaluate(b)
    with self.assertRaisesRegexp(RuntimeError, 'unknown device'):
      with ops.device('/device:CPU:2'):
        c = constant_op.constant(1.0)
        self.evaluate(c)

    # Ensure we can place ops on each of the device names
    for cpu in cpus:
      with ops.device(cpu.name):
        d = constant_op.constant(1.0)
        self.evaluate(d)
Exemplo n.º 2
0
  def testGpuInvalidConfig(self):
    gpus = config.list_physical_devices('GPU')
    self.assertNotEqual(len(gpus), 0)

    for gpu in gpus:
      config.set_memory_growth(gpu, True)

    c = context.context().config
    self.assertTrue(c.gpu_options.allow_growth)

    with self.assertRaisesRegexp(ValueError, 'memory limit'):
      config.set_virtual_device_configuration(gpus[-1], [
          context.VirtualDeviceConfiguration(),
          context.VirtualDeviceConfiguration()
      ])

    self.assertIsNone(config.get_virtual_device_configuration(gpus[-1]))
    config.set_virtual_device_configuration(gpus[-1], [
        context.VirtualDeviceConfiguration(memory_limit=10),
        context.VirtualDeviceConfiguration(memory_limit=10)
    ])

    c = context.context().config
    self.assertFalse(c.gpu_options.allow_growth)

    with self.assertRaisesRegexp(ValueError, 'virtual devices'):
      config.set_memory_growth(gpus[-1], False)
Exemplo n.º 3
0
def _mimic_two_cpus():
    cpus = config.list_physical_devices("CPU")

    config.set_virtual_device_configuration(cpus[0], [
        context.VirtualDeviceConfiguration(),
        context.VirtualDeviceConfiguration(),
    ])
Exemplo n.º 4
0
  def testGpuInvalidConfig(self):
    gpus = config.list_physical_devices('GPU')
    self.assertNotEqual(len(gpus), 0)

    for gpu in gpus:
      config.set_memory_growth(gpu, True)

    c = context.context().config
    self.assertTrue(c.gpu_options.allow_growth)

    with self.assertRaisesRegexp(ValueError, 'memory limit'):
      config.set_virtual_device_configuration(gpus[-1], [
          context.VirtualDeviceConfiguration(),
          context.VirtualDeviceConfiguration()
      ])

    self.assertIsNone(config.get_virtual_device_configuration(gpus[-1]))
    config.set_virtual_device_configuration(gpus[-1], [
        context.VirtualDeviceConfiguration(memory_limit=10),
        context.VirtualDeviceConfiguration(memory_limit=10)
    ])

    c = context.context().config
    self.assertFalse(c.gpu_options.allow_growth)

    with self.assertRaisesRegexp(ValueError, 'virtual devices'):
      config.set_memory_growth(gpus[-1], False)
Exemplo n.º 5
0
def configure_virtual_cpus():
    cpus = config.list_physical_devices('CPU')
    # Set 2 virtual CPUs
    config.set_virtual_device_configuration(cpus[0], [
        context.VirtualDeviceConfiguration(),
        context.VirtualDeviceConfiguration()
    ])
Exemplo n.º 6
0
  def testCpuMultiple(self):
    cpus = config.list_physical_devices('CPU')
    self.assertEqual(len(cpus), 1)

    config.set_virtual_device_configuration(cpus[0], [
        context.VirtualDeviceConfiguration(),
        context.VirtualDeviceConfiguration()
    ])

    context.ensure_initialized()

    cpus = config.list_logical_devices('CPU')
    self.assertEqual(len(cpus), 2)

    with ops.device('/device:CPU:0'):
      a = constant_op.constant(1.0)
      self.evaluate(a)
    with ops.device('/device:CPU:1'):
      b = constant_op.constant(1.0)
      self.evaluate(b)
    with self.assertRaisesRegexp(RuntimeError, 'unknown device'):
      with ops.device('/device:CPU:2'):
        c = constant_op.constant(1.0)
        self.evaluate(c)

    # Ensure we can place ops on each of the device names
    for cpu in cpus:
      with ops.device(cpu.name):
        d = constant_op.constant(1.0)
        self.evaluate(d)
Exemplo n.º 7
0
 def worker_fn():
   gpus = config.list_physical_devices('GPU')
   if gpus:
     # Set virtual GPU with memory limit of 64MB so that multiple worker
     # processes can share the physical GPU
     config.set_virtual_device_configuration(
         gpus[0], [context.VirtualDeviceConfiguration(64)])
   for _ in range(100):
     worker_step_fn()
Exemplo n.º 8
0
 def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
   super(TestMultiGPUModel, self).__init__(methodName)
   physical_devices = config.list_physical_devices('GPU')
   if len(physical_devices) == 1:
     # A GPU is available, simulate 2 instead.
     config.set_virtual_device_configuration(
         physical_devices[0], [
             context.VirtualDeviceConfiguration(500),
             context.VirtualDeviceConfiguration(500)
         ])
 def _ensure_context_initialized(self):
   gpus = config.list_physical_devices('GPU')
   if len(gpus) < 1:
     self.skipTest('Expected at least 1 GPU but found {} GPUs'.format(
         len(gpus)))
   config.set_virtual_device_configuration(gpus[0], [
       context.VirtualDeviceConfiguration(1024),
       context.VirtualDeviceConfiguration(1024)
   ])
   context.ensure_initialized()
Exemplo n.º 10
0
 def setUp(self):
     super(FunctionGradientsTest, self).setUp()
     cpus = config.list_physical_devices('CPU')
     # Set 4 virtual CPUs
     config.set_virtual_device_configuration(cpus[0], [
         context.VirtualDeviceConfiguration(),
         context.VirtualDeviceConfiguration(),
         context.VirtualDeviceConfiguration(),
         context.VirtualDeviceConfiguration()
     ])
 def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
     super(TestMultiGPUModel, self).__init__(methodName)
     gpu_devices = config.list_physical_devices('GPU')
     xla_gpu_devices = config.list_physical_devices('XLA_GPU')
     # NOTE: XLA devices don't support the set_virtual_device_configuration
     # codepaths.
     if len(gpu_devices) == 1 and not xla_gpu_devices:
         # A GPU is available, simulate 2 instead.
         config.set_virtual_device_configuration(gpu_devices[0], [
             context.VirtualDeviceConfiguration(500),
             context.VirtualDeviceConfiguration(500)
         ])
Exemplo n.º 12
0
  def testGpuInvalidConfig(self):
    gpus = config.list_physical_devices('GPU')
    self.assertNotEqual(len(gpus), 0)

    if len(gpus) > 1:
      # Assert if other GPUs were not configured
      config.set_memory_growth(gpus[0], True)
      with self.assertRaisesRegexp(ValueError, 'cannot differ'):
        c = context.context().config

      # If we limit visibility to GPU 0, growth is fine
      config.set_visible_devices(gpus[0], 'GPU')
      c = context.context().config
      self.assertTrue(c.gpu_options.allow_growth)

      # Default setting for second GPU is False and works if we set visibility
      config.set_visible_devices(gpus[1], 'GPU')
      c = context.context().config
      self.assertFalse(c.gpu_options.allow_growth)

      # Growth now fails because all the GPUs are visible and not the same
      config.set_visible_devices(gpus, 'GPU')
      with self.assertRaisesRegexp(ValueError, 'cannot differ'):
        c = context.context().config

    for gpu in gpus:
      config.set_memory_growth(gpu, True)

    c = context.context().config
    self.assertTrue(c.gpu_options.allow_growth)

    with self.assertRaisesRegexp(ValueError, 'memory limit'):
      config.set_virtual_device_configuration(gpus[-1], [
          context.VirtualDeviceConfiguration(),
          context.VirtualDeviceConfiguration()
      ])

    self.assertIsNone(config.get_virtual_device_configuration(gpus[-1]))
    config.set_virtual_device_configuration(gpus[-1], [
        context.VirtualDeviceConfiguration(memory_limit=10),
        context.VirtualDeviceConfiguration(memory_limit=10)
    ])

    c = context.context().config
    self.assertFalse(c.gpu_options.allow_growth)

    with self.assertRaisesRegexp(ValueError, 'virtual devices'):
      config.set_memory_growth(gpus[-1], False)
Exemplo n.º 13
0
    def testCollectiveReduceMinMax(self):
        gpus = config.list_physical_devices('GPU')
        if len(gpus) != 1:
            self.skipTest('Expected 1 GPU but found {} GPUs'.format(len(gpus)))
        config.set_virtual_device_configuration(gpus[0], [
            context.VirtualDeviceConfiguration(1024),
            context.VirtualDeviceConfiguration(1024)
        ])
        context.ensure_initialized()

        @def_function.function
        def run_all_reduce(group_key, instance_key, merge_op):
            group_size = 2
            t0 = [1., 20., 3., 40., 5.]
            t1 = [10., 2., 30., 4., 50.]
            os.environ['NCCL_DEBUG'] = 'INFO'
            os.environ['NCCL_LAUNCH_MODE'] = 'PARALLEL'
            with ops.device('/GPU:0'):
                in0 = constant_op.constant(t0)
                c0 = collective_ops.all_reduce(in0,
                                               group_size,
                                               group_key,
                                               instance_key,
                                               merge_op,
                                               final_op='Id',
                                               communication_hint='nccl')
            with ops.device('/GPU:1'):
                in1 = constant_op.constant(t1)
                c1 = collective_ops.all_reduce(in1,
                                               group_size,
                                               group_key,
                                               instance_key,
                                               merge_op,
                                               final_op='Id',
                                               communication_hint='nccl')
            return c0, c1

        for combination in [('Max', [10., 20., 30., 40., 50.]),
                            ('Min', [1., 2., 3., 4., 5.])]:
            merge_op = combination[0]
            results = run_all_reduce(group_key=10,
                                     instance_key=20,
                                     merge_op=merge_op)
            expected = combination[1]
            for result in results:
                self.assertAllClose(result, expected, rtol=1e-5, atol=1e-5)
def SetVirtualCpus(num_virtual_cpus):
  """Create virtual CPU devices if they haven't yet been created."""
  if num_virtual_cpus < 1:
    raise ValueError('`num_virtual_cpus` must be at least 1 not %r' %
                     (num_virtual_cpus,))
  physical_devices = device_config.list_physical_devices('CPU')
  if not physical_devices:
    raise RuntimeError('No CPUs found')
  configs = device_config.get_virtual_device_configuration(physical_devices[0])
  if configs is None:
    virtual_devices = [context.VirtualDeviceConfiguration()
                       for _ in range(num_virtual_cpus)]
    device_config.set_virtual_device_configuration(
        physical_devices[0], virtual_devices)
  else:
    if len(configs) < num_virtual_cpus:
      raise RuntimeError('Already configured with %d < %d virtual CPUs' %
                         (len(configs), num_virtual_cpus))
def set_virtual_cpus_to_at_least(num_virtual_cpus):
    """Create virtual CPU devices if they haven't yet been created."""
    if num_virtual_cpus < 1:
        raise ValueError("`num_virtual_cpus` must be at least 1 not %r" %
                         (num_virtual_cpus, ))
    physical_devices = config.list_physical_devices("CPU")
    if not physical_devices:
        raise RuntimeError("No CPUs found")
    configs = config.get_virtual_device_configuration(physical_devices[0])
    if configs is None:
        virtual_devices = [
            context.VirtualDeviceConfiguration()
            for _ in range(num_virtual_cpus)
        ]
        config.set_virtual_device_configuration(physical_devices[0],
                                                virtual_devices)
    else:
        if len(configs) < num_virtual_cpus:
            raise RuntimeError("Already configured with %d < %d virtual CPUs" %
                               (len(configs), num_virtual_cpus))
Exemplo n.º 16
0
  def testVirtualGpu(self):
    gpus = config.list_physical_devices('GPU')
    self.assertNotEqual(len(gpus), 0)

    self.assertIsNone(config.get_virtual_device_configuration(gpus[-1]))
    config.set_virtual_device_configuration(gpus[-1], [
        context.VirtualDeviceConfiguration(memory_limit=10),
        context.VirtualDeviceConfiguration(memory_limit=10)
    ])
    self.assertEqual(len(config.get_virtual_device_configuration(gpus[-1])), 2)

    logical_gpus = config.list_logical_devices('GPU')
    self.assertTrue(len(logical_gpus), len(gpus) + 1)
    for i in range(0, len(logical_gpus)):
      with ops.device('/device:GPU:' + str(i)):
        a = constant_op.constant(1.0)
        self.evaluate(a)

    with self.assertRaisesRegexp(RuntimeError, 'unknown device'):
      with ops.device('/device:GPU:' + str(len(logical_gpus))):
        a = constant_op.constant(1.0)
        self.evaluate(a)
Exemplo n.º 17
0
  def testVirtualGpu(self):
    gpus = config.list_physical_devices('GPU')
    self.assertNotEqual(len(gpus), 0)

    self.assertIsNone(config.get_virtual_device_configuration(gpus[-1]))
    config.set_virtual_device_configuration(gpus[-1], [
        context.VirtualDeviceConfiguration(memory_limit=10),
        context.VirtualDeviceConfiguration(memory_limit=10)
    ])
    self.assertEqual(len(config.get_virtual_device_configuration(gpus[-1])), 2)

    logical_gpus = config.list_logical_devices('GPU')
    self.assertTrue(len(logical_gpus), len(gpus) + 1)
    for i in range(0, len(logical_gpus)):
      with ops.device('/device:GPU:' + str(i)):
        a = constant_op.constant(1.0)
        self.evaluate(a)

    with self.assertRaisesRegexp(RuntimeError, 'unknown device'):
      with ops.device('/device:GPU:' + str(len(logical_gpus))):
        a = constant_op.constant(1.0)
        self.evaluate(a)
Exemplo n.º 18
0
    def testCollectiveGroupSizeMismatch(self):
        cpus = config.list_physical_devices('CPU')
        self.assertEqual(len(cpus), 1)
        config.set_virtual_device_configuration(cpus[0], [
            context.VirtualDeviceConfiguration(),
            context.VirtualDeviceConfiguration()
        ])
        context.ensure_initialized()

        @def_function.function
        def run_all_reduce():
            group_key = 10
            instance_key = 20
            t0 = [1, 2, 3, 4]
            t1 = [5, 6, 7, 8]
            with ops.device('/CPU:0'):
                in0 = constant_op.constant(t0)
                c0 = collective_ops.all_reduce(in0,
                                               group_size=2,
                                               group_key=group_key,
                                               instance_key=instance_key,
                                               merge_op='Add',
                                               final_op='Id')
            with ops.device('/CPU:1'):
                in1 = constant_op.constant(t1)
                c1 = collective_ops.all_reduce(in1,
                                               group_size=3,
                                               group_key=group_key,
                                               instance_key=instance_key,
                                               merge_op='Add',
                                               final_op='Id')
            return c0, c1

        with self.assertRaisesRegexp(errors.InternalError,
                                     'but that group has size'):
            run_all_reduce()