Exemplo n.º 1
0
from common import TestCase


TCP_ADDR = '127.0.0.1'
TCP_PORT = 29500

TIMEOUT_DEFAULT = 5
TIMEOUT_OVERRIDE = {}


def get_timeout(test_id):
    return TIMEOUT_OVERRIDE.get(test_id.split('.')[-1], TIMEOUT_DEFAULT)


if not c10d.is_available():
    print('c10d not available, skipping tests')
    sys.exit(0)


class StoreTestBase(object):
    def _create_store(self, i):
        raise RuntimeError("not implemented")

    def _test_set_get(self, fs):
        fs.set("key0", "value0")
        fs.set("key1", "value1")
        fs.set("key2", "value2")
        self.assertEqual(b"value0", fs.get("key0"))
        self.assertEqual(b"value1", fs.get("key1"))
        self.assertEqual(b"value2", fs.get("key2"))
Exemplo n.º 2
0
DEFAULT_TIMEOUT = 300
CUSTOMIZED_TIMEOUT = {"test_DistributedDataParallel": 500}

if INIT_METHOD.startswith("file://"):
    FOLDER = INIT_METHOD[7:]


def get_timeout(test_id):
    test_name = test_id.split(".")[-1]
    if test_name in CUSTOMIZED_TIMEOUT:
        return CUSTOMIZED_TIMEOUT[test_name]
    else:
        return DEFAULT_TIMEOUT


if not dist.is_available():
    print("Distributed not available, skipping tests")
    sys.exit(0)

SKIP_IF_NO_CUDA_EXIT_CODE = 75
SKIP_IF_NO_GPU_EXIT_CODE = 76
SKIP_IF_SMALL_WORLDSIZE_EXIT_CODE = 77
SKIP_IF_BACKEND_UNAVAILABLE = 78


def skip_if_no_cuda_distributed(func):
    func.skip_if_no_cuda_distributed = True

    @wraps(func)
    def wrapper(*args, **kwargs):
        if not torch.cuda.is_available():
Exemplo n.º 3
0
from common import TestCase


TCP_ADDR = '127.0.0.1'
TCP_PORT = 29500

TIMEOUT_DEFAULT = 5
TIMEOUT_OVERRIDE = {}


def get_timeout(test_id):
    return TIMEOUT_OVERRIDE.get(test_id.split('.')[-1], TIMEOUT_DEFAULT)


if not c10d.is_available():
    print('c10d not available, skipping tests')
    sys.exit(0)


class StoreTestBase(object):
    def _create_store(self, i):
        raise RuntimeError("not implemented")

    def _test_set_get(self, fs):
        fs.set("key0", "value0")
        fs.set("key1", "value1")
        fs.set("key2", "value2")
        self.assertEqual(b"value0", fs.get("key0"))
        self.assertEqual(b"value1", fs.get("key1"))
        self.assertEqual(b"value2", fs.get("key2"))
Exemplo n.º 4
0
    'cpp_extensions',
    'distributed',
    'distributions',
    'multiprocessing',
    'nccl',
    'thd_distributed',
    'utils',
]

DISTRIBUTED_TESTS_CONFIG = {
    'gloo': {
        'WORLD_SIZE': '2' if torch.cuda.device_count() == 2 else '3'
    },
}

if c10d.is_available():
    if c10d.is_mpi_available():
        DISTRIBUTED_TESTS_CONFIG['mpi'] = {'WORLD_SIZE': '3'}
    if c10d.is_nccl_available():
        DISTRIBUTED_TESTS_CONFIG['nccl'] = {
            'WORLD_SIZE': '2' if torch.cuda.device_count() == 2 else '3'
        }

THD_DISTRIBUTED_TESTS_CONFIG = {
    'tcp': {
        'WORLD_SIZE': '3'
    },
    'gloo': {
        'WORLD_SIZE': '2' if torch.cuda.device_count() == 2 else '3'
    },
    # THD NCCL and MPI tests are known to be flaky in CI