def main(): import torch.nn as nn import torch.distributed as dist import torch.optim as optim import torch.utils.data import mars.tensor as mt from mars.learn.contrib.pytorch import MarsDataset, MarsDistributedSampler dist.init_process_group(backend='gloo') torch.manual_seed(42) data = mt.named_tensor(name='data') labels = mt.named_tensor(name='labels') train_dataset = MarsDataset(data, labels) train_sampler = MarsDistributedSampler(train_dataset) train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=32, shuffle=False, sampler=train_sampler) model = nn.parallel.DistributedDataParallel(get_model()) optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5) criterion = nn.BCELoss() for _ in range(2): # 2 epochs for _, (batch_data, batch_labels) in enumerate(train_loader): outputs = model(batch_data) loss = criterion(outputs.squeeze(), batch_labels) optimizer.zero_grad() loss.backward() optimizer.step()
def testDistributedSampler(self, *_): service_ep = 'http://127.0.0.1:' + self.web_port scheduler_ep = '127.0.0.1:' + self.scheduler_port with new_session(service_ep) as sess: raw1 = np.random.rand(100, 200) data1 = mt.tensor(raw1, chunk_size=40) data1.execute(name='data1', session=sess) raw2 = np.random.rand(100, ) data2 = mt.tensor(raw2, chunk_size=60) data2.execute(name='data2', session=sess) with DistributedContext(scheduler_address=scheduler_ep, session_id=sess.session_id): t1 = mt.named_tensor(name='data1', session=sess) t2 = mt.named_tensor(name='data2', session=sess) dataset = MarsDataset(t1, t2) self.assertEqual(len(dataset), 100) sampler = MarsDistributedSampler(dataset, num_replicas=1, rank=0) indices = sampler.generate_indices() r1 = np.array(dataset._get_data(indices)[0]) r2 = np.array([dataset[ind][0] for ind in sampler]) np.testing.assert_array_equal(r1, r2) r1 = np.array(dataset._get_data(indices)[1]) r2 = np.array([dataset[ind][1] for ind in sampler]) np.testing.assert_array_equal(r1, r2) self.assertEqual(len(sampler), 100) sampler.set_epoch(1) self.assertEqual(sampler.epoch, 1)
def testNamed(self): rs = np.random.RandomState(0) raw = rs.rand(10, 10) sess = Session.default_or_local() # test named tensor t = mt.tensor(raw, chunk_size=3) name = 't_name' r1 = t.execute(name=name, session=sess) np.testing.assert_array_equal(r1, raw) t2 = mt.named_tensor(name=name, session=sess) self.assertEqual(t2.order, TensorOrder.C_ORDER) r2 = (t2 + 1).execute(session=sess).fetch() np.testing.assert_array_equal(r2, raw + 1) # test named series name = 's_name' raw = pd.Series([1, 2, 3]) s = md.Series(raw) r1 = s.execute(name=name, session=sess).fetch() pd.testing.assert_series_equal(r1, raw) s2 = md.named_series(name=name, session=sess) self.assertEqual(s2.dtype, s.dtype) pd.testing.assert_index_equal(s2.index_value.to_pandas(), s.index_value.to_pandas()) r2 = s2.execute(session=sess).fetch() pd.testing.assert_series_equal(r2, raw) # test dataframe name = 'd_name' raw = pd.DataFrame(np.random.rand(10, 3)) d = md.DataFrame(raw, chunk_size=4) r1 = d.execute(name=name, session=sess).fetch() pd.testing.assert_frame_equal(r1, raw) d2 = md.named_dataframe(name=name, session=sess) pd.testing.assert_series_equal(d2.dtypes, d.dtypes) pd.testing.assert_index_equal(d2.index_value.to_pandas(), d.index_value.to_pandas()) pd.testing.assert_index_equal(d2.columns_value.to_pandas(), d.columns_value.to_pandas()) r2 = d2.execute(session=sess).fetch() pd.testing.assert_frame_equal(r2, raw)
def testDistributedDataset(self): service_ep = 'http://127.0.0.1:' + self.web_port scheduler_ep = '127.0.0.1:' + self.scheduler_port with new_session(service_ep) as sess: raw = np.random.rand(100, 200) data = mt.tensor(raw, chunk_size=40) data.execute(name='data', session=sess) with DistributedContext(scheduler_address=scheduler_ep, session_id=sess.session_id): t = mt.named_tensor(name='data', session=sess) dataset = MarsDataset(t) self.assertEqual(len(dataset), 100) sample = np.random.randint(0, 100, (10, )) r1 = dataset[sample][0] np.testing.assert_array_equal(raw[sample], r1) sample = np.random.randint(0, 100, (10, )) dataset.prefetch(sample) r2 = np.array([dataset[ind][0] for ind in sample]) np.testing.assert_array_equal(raw[sample], r2)
def testNamed(self): rs = np.random.RandomState(0) raw = rs.rand(10, 10) sess = Session.default_or_local() # test named tensor t = mt.tensor(raw, chunk_size=3) name = 't_name' r1 = t.execute(name=name, session=sess) np.testing.assert_array_equal(r1, raw) t2 = mt.named_tensor(name=name, session=sess) r2 = (t2 + 1).execute(session=sess).fetch() np.testing.assert_array_equal(r2, raw + 1) # test named series name = 's_name' raw = pd.Series([1, 2, 3]) s = md.Series(raw) r1 = s.execute(name=name, session=sess).fetch() pd.testing.assert_series_equal(r1, raw) s2 = md.named_series(name=name, session=sess) r2 = s2.execute(session=sess).fetch() pd.testing.assert_series_equal(r2, raw) # test dataframe name = 'd_name' raw = pd.DataFrame(np.random.rand(10, 3)) d = md.DataFrame(raw, chunk_size=4) r1 = d.execute(name=name, session=sess).fetch() pd.testing.assert_frame_equal(r1, raw) d2 = md.named_dataframe(name=name, session=sess) r2 = d2.execute(session=sess).fetch() pd.testing.assert_frame_equal(r2, raw)
def f3(): import mars.tensor as mt s = mt.named_tensor(name='t_name') return (s + 1).to_numpy()