Python BasicFTLib.init примеры использования

Язык программирования: Python

Пространство имен/Пакет: ftlib

Класс/Тип: BasicFTLib

Метод/Функция: init

Примеров на hotexamples.com: 3

Python BasicFTLib.init - 3 примера найдено. Это лучшие примеры Python кода для ftlib.BasicFTLib.init, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

BasicFTLib(9)

broadcast(3)

init(3)

wait_gradients_ready(3)

barrier(2)

consensus_joined(2)

manual_join(2)

wait_weights_ready(2)

allreduce_average(1)

execute(1)

initialized(1)

skip_allreduce(1)

Пример #1

Показать файл

class CollectiveCommunicator(object):
    def __init__(self):
        if _FTLIB_INSTALLED:
            self._ftlib = BasicFTLib()
            self._ftlib.init(consensus="gossip", commlib="pytorch")
        else:
            logger.warning(
                "FTLib is not installed. The CollectiveCommunicator "
                "may not work as expected")
            self._ftlib = None

    def allreduce(self, data, op="MEAN"):
        if data is None:
            logger.error("Data is required for allreduce operation")
            return CollectiveCommunicatorStatus.FAILED, data
        if op not in _SUPPORTED_ALLREDUCE_OPS:
            logger.error(
                "%s is not in list of supported allreduce operations: %s" %
                (op, _SUPPORTED_ALLREDUCE_OPS))
            return CollectiveCommunicatorStatus.FAILED, data
        if self._ftlib is not None:
            res = self._ftlib.allreduce_average(data)
            if res == FTAllReduceStatus.SUCCESS:
                return CollectiveCommunicatorStatus.SUCCEEDED, data
            else:
                return CollectiveCommunicatorStatus.FAILED, data
        else:
            logger.warning("FTLib is not installed. "
                           "Default to succeeded for testing purposes")
            return CollectiveCommunicatorStatus.SUCCEEDED, data

    def broadcast(self, data, root_ip):
        if self._ftlib is not None:
            res = self._ftlib.broadcast(data, root_ip)
            if res == FTAllReduceStatus.SUCCESS:
                return CollectiveCommunicatorStatus.SUCCEEDED, data
            else:
                return CollectiveCommunicatorStatus.FAILED, data
        else:
            logger.warning("FTLib is not installed. "
                           "Default to succeeded for testing purposes")
            return CollectiveCommunicatorStatus.SUCCEEDED, data

    def barrier(self):
        return CollectiveCommunicatorStatus.SUCCEEDED

    def has_new_worker_joining(self):
        return True

Пример #2

Показать файл

        x = self.fc1(x)
        x = F.relu(x)
        x = self.dropout2(x)
        x = self.fc2(x)
        output = F.log_softmax(x, dim=1)
        return output


if __name__ == '__main__':
    logging.info("start!")

    epochs = 1

    # initialize the fault-tolerant library with consensus and framework options
    ftlib = BasicFTLib()
    ftlib.init(consensus='shared_storage', framework='pytorch')

    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")
    model = Net().to(device)
    optimizer = optim.Adadelta(model.parameters(), lr=1.0)
    scheduler = StepLR(optimizer, step_size=1, gamma=0.7)

    for epoch in range(1, epochs + 1):
        model.train()
        for batch_idx, (data, target) in enumerate(train_loader):
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            output = model(data)
            loss = F.nll_loss(output, target)
            time.sleep(0.5)

Пример #3

Показать файл


def dummy_update():
    logging.info("dummy update")
    time.sleep(0.5)


if __name__ == '__main__':
    logging.info("start!")

    epochs = 1
    dl = dummy_dataloader(10)

    # initialize the fault-tolerant library with consensus and framework options
    ftlib = BasicFTLib()
    ftlib.init(consensus='shared_storage', framework='dummy_NCCL')

    for _ in range(epochs):
        for batch in dl:
            dummy_forward()
            dummy_backward()

            if ftlib.skip_allreduce:
                logging.info("skip allreduce")
                dummy_update()
                continue
            else:
                res = ftlib.wait_weights_ready()
            if res == FTAllReduceStatus.NO_NEED:
                logging.critical(
                    "cannot use average_gradient when there is no need")