Exemplo n.º 1
0
logging.basicConfig(level=logging.DEBUG)

# for single gpu
# kv = mx.kvstore.create('local')
# batch_size = 48
# devs = mx.gpu(0)

# dist_async - async sgd
# dist_sync - BSP sgd
kv = mx.kvstore.create('dist_async')
# assume each worker has two gpus
devs = [mx.gpu(i) for i in range(2)]
batch_size = 96

(train, val) = imagenet.ilsvrc12(num_parts=kv.num_workers,
                                 part_index=kv.rank,
                                 batch_size=batch_size,
                                 input_shape=(3, 224, 224))

model = mx.model.FeedForward(ctx=devs,
                             symbol=imagenet.inception(1000),
                             num_epoch=20,
                             learning_rate=0.05,
                             momentum=0.9,
                             wd=0.00001)

model.fit(X=train,
          eval_data=val,
          kvstore=kv,
          batch_end_callback=mx.callback.Speedometer(batch_size, 10))
Exemplo n.º 2
0
# kv_type = 'local'
## distributed version, can be dist_async or dist_sync
kv_type = 'dist_async'

## batch size for one gpu
batch_size_per_gpu = 40
## number of gpus used in a worker
num_gpus = 1
## learning rate
learning_rate = 0.05

batch_size = batch_size_per_gpu * num_gpus
kv = mx.kvstore.create(kv_type)

(train, val) = imagenet.ilsvrc12(data_dir=data_dir,
                                 num_parts=kv.num_workers,
                                 part_index=kv.rank,
                                 batch_size=batch_size)

logging.basicConfig(level=logging.DEBUG)

model = mx.model.FeedForward(ctx=[mx.gpu(i) for i in range(num_gpus)],
                             symbol=imagenet.inception(1000),
                             num_epoch=20,
                             epoch_size=1281167 / batch_size / kv.num_workers,
                             learning_rate=learning_rate,
                             momentum=0.9,
                             wd=0.00001)

model.fit(X=train,
          eval_data=val,
          kvstore=kv,
Exemplo n.º 3
0
#!/usr/bin/env python
import mxnet as mx
import logging
import imagenet

logging.basicConfig(level=logging.DEBUG)

kv = mx.kvstore.create("dist_sync")

batch_size = 96
(train, val) = imagenet.ilsvrc12(
    num_parts=kv.num_workers, part_index=kv.rank, batch_size=batch_size, input_shape=(3, 224, 224)
)

# assume each worker has two gpus
devs = [mx.gpu(i) for i in range(2)]

model = mx.model.FeedForward(
    ctx=devs, symbol=imagenet.inception(1000), num_round=20, learning_rate=0.05, momentum=0.9, wd=0.00001
)

model.fit(X=train, eval_data=val, kvstore=kv, epoch_end_callback=mx.callback.Speedometer(batch_size, 5))
Exemplo n.º 4
0
## distributed version, can be dist_async or dist_sync
kv_type = 'dist_async'

## batch size for one gpu
batch_size_per_gpu = 40
## number of gpus used in a worker
num_gpus = 1
## learning rate
learning_rate = 0.05

batch_size = batch_size_per_gpu * num_gpus
kv = mx.kvstore.create(kv_type)

(train, val) = imagenet.ilsvrc12(
    data_dir = data_dir,
    num_parts = kv.num_workers,
    part_index = kv.rank,
    batch_size = batch_size)

logging.basicConfig(level=logging.DEBUG)

model = mx.model.FeedForward(
    ctx           = [mx.gpu(i) for i in range(num_gpus)],
    symbol        = imagenet.inception(1000),
    num_epoch     = 20,
    epoch_size    = 1281167 / batch_size / kv.num_workers,
    learning_rate = learning_rate,
    momentum      = 0.9,
    wd            = 0.00001)

model.fit(X        = train,