示例#1
0
    def __init__(self, config: Munch = None, **kwargs):
        if config == None:
            config = Miner.default_config();       
        bittensor.config.Config.update_with_kwargs(config.miner, kwargs) 
        Miner.check_config(config)
        self.config = config

        # ---- Model ----
        self.model = BertMLMSynapse( self.config )

        # ---- Optimizer ----
        self.optimizer = torch.optim.SGD(self.model.parameters(), lr = self.config.miner.learning_rate, momentum=self.config.miner.momentum)
        self.scheduler = WarmupCosineWithHardRestartsSchedule(self.optimizer, 50, 300)

        # ---- Model Load/Save tools ----
        self.model_toolbox = ModelToolbox(BertMLMSynapse, torch.optim.SGD)

        # ---- Dataset ----
        # Dataset: 74 million sentences pulled from books.
        self.dataset = load_dataset('ag_news')['train']
        # The collator accepts a list [ dict{'input_ids, ...; } ] where the internal dict 
        # is produced by the tokenizer.
        self.data_collator = DataCollatorForLanguageModeling (
            tokenizer=bittensor.__tokenizer__(), mlm=True, mlm_probability=0.15
        )
        super( Miner, self ).__init__( self.config, **kwargs )
示例#2
0
    def train(self):
        self.training_loss = 0.0
        for local_step in range(self.config.miner.epoch_length):
            # ---- Forward pass ----
            inputs = nextbatch(self.dataset,
                               self.config.miner.batch_size_train,
                               bittensor.__tokenizer__())
            output = self.model.remote_forward(
                self.neuron,
                inputs.to(self.model.device),
                training=True,
            )

            # ---- Backward pass ----
            loss = output.local_target_loss + output.distillation_loss + output.remote_target_loss
            loss.backward()  # Accumulates gradients on the model.
            self.optimizer.step()  # Applies accumulated gradients.
            self.optimizer.zero_grad(
            )  # Zeros out gradients for next accummulation

            # ---- Train row weights ----
            batch_weights = torch.mean(output.router.weights, axis=0).to(
                self.model.device)  # Average over batch.
            self.row = (
                1 -
                0.03) * self.row + 0.03 * batch_weights  # Moving avg update.
            self.row = F.normalize(self.row, p=1,
                                   dim=0)  # Ensure normalization.

            # ---- Step logs ----
            logger.info(
                'GS: {} LS: {} Epoch: {}\tLocal Target Loss: {}\tRemote Target Loss: {}\tDistillation Loss: {}\tAxon: {}\tDendrite: {}',
                colored('{}'.format(self.global_step), 'red'),
                colored('{}'.format(local_step), 'blue'),
                colored('{}'.format(self.epoch), 'green'),
                colored('{:.4f}'.format(output.local_target_loss.item()),
                        'green'),
                colored('{:.4f}'.format(output.remote_target_loss.item()),
                        'blue'),
                colored('{:.4f}'.format(output.distillation_loss.item()),
                        'red'), self.neuron.axon, self.neuron.dendrite)
            logger.info('Codes: {}', output.router.return_codes.tolist())

            self.tensorboard.add_scalar('Neuron/Rloss',
                                        output.remote_target_loss.item(),
                                        self.global_step)
            self.tensorboard.add_scalar('Neuron/Lloss',
                                        output.local_target_loss.item(),
                                        self.global_step)
            self.tensorboard.add_scalar('Neuron/Dloss',
                                        output.distillation_loss.item(),
                                        self.global_step)

            # ---- Step increments ----
            self.global_step += 1
            self.training_loss += output.local_target_loss.item()

            # --- Memory clean up ----
            torch.cuda.empty_cache()
            del output
示例#3
0
    def __init__(self, config: Munch):
        self.config = config

        # ---- Neuron ----
        self.neuron = Neuron(self.config)

        # ---- Model ----
        self.model = BertMLMSynapse(self.config)

        # ---- Optimizer ----
        self.optimizer = torch.optim.SGD(self.model.parameters(),
                                         lr=self.config.session.learning_rate,
                                         momentum=self.config.session.momentum)
        self.scheduler = WarmupCosineWithHardRestartsSchedule(
            self.optimizer, 50, 300)

        # ---- Dataset ----
        # Dataset: 74 million sentences pulled from books.
        self.dataset = load_dataset('bookcorpus')['train']
        # The collator accepts a list [ dict{'input_ids, ...; } ] where the internal dict
        # is produced by the tokenizer.
        self.data_collator = DataCollatorForLanguageModeling(
            tokenizer=bittensor.__tokenizer__(),
            mlm=True,
            mlm_probability=0.15)

        # ---- Logging ----
        self.tensorboard = SummaryWriter(log_dir=self.config.session.full_path)
        if self.config.session.record_log:
            logger.add(
                self.config.session.full_path + "/{}_{}.log".format(
                    self.config.session.name, self.config.session.trial_uid),
                format="{time:YYYY-MM-DD at HH:mm:ss} | {level} | {message}")
示例#4
0
    def __init__(self, config: Munch = None, **kwargs):
        if config == None:
            config = Miner.default_config()
        bittensor.config.Config.update_with_kwargs(config.miner, kwargs)
        Miner.check_config(config)
        self.config = config

        # ---- Neuron ----
        self.neuron = bittensor.neuron.Neuron(self.config)

        # ---- Model ----
        self.model = BertMLMSynapse(self.config)

        # ---- Optimizer ----
        self.optimizer = torch.optim.SGD(self.model.parameters(),
                                         lr=self.config.miner.learning_rate,
                                         momentum=self.config.miner.momentum)
        self.scheduler = WarmupCosineWithHardRestartsSchedule(
            self.optimizer, 50, 300)

        # ---- Model Load/Save tools ----
        self.model_toolbox = ModelToolbox(BertMLMSynapse, torch.optim.SGD)

        # ---- Dataset ----
        # Dataset: 74 million sentences pulled from books.
        self.dataset = load_dataset('ag_news')['train']
        # The collator accepts a list [ dict{'input_ids, ...; } ] where the internal dict
        # is produced by the tokenizer.
        self.data_collator = DataCollatorForLanguageModeling(
            tokenizer=bittensor.__tokenizer__(),
            mlm=True,
            mlm_probability=0.15)

        # ---- Logging ----
        self.tensorboard = SummaryWriter(log_dir=self.config.miner.full_path)
        if self.config.miner.record_log == True:
            filepath = self.config.miner.full_path + "/{}_{}.log".format(
                self.config.miner.name, self.config.miner.trial_uid),
            logger.add(
                filepath,
                format="{time:YYYY-MM-DD at HH:mm:ss} | {level} | {message}",
                rotation="250 MB",
                retention="10 days")
示例#5
0
    def __init__(self, batch_size, block_size, config=None):
        super(GenesisTextDataloader, self).__init__()

        assert batch_size > 0, 'Batch size must be larger than 0'
        assert block_size > 0, 'Block size must be larger than 0'

        if config == None:
            config = BittensorDataLoader.default_config()

        self.config = config
        self.block_size = block_size
        self.tokenizer = bittensor.__tokenizer__()
        self.batch_size = batch_size

        # Retrieve a random slice of the genesis dataset
        self.data = self.construct_text_corpus()

        # Used to refresh corpus if we've exhausted the whole dataset
        self.refresh_corpus = False
示例#6
0
 def __init__(self, block_size: int, tokenizer=bittensor.__tokenizer__()):
     self.block_size = block_size
     self.tokenizer = tokenizer
     self.lines = load_dataset('glue', 'cola')['train']
示例#7
0
    def __init__(self, block_size: int, tokenizer=bittensor.__tokenizer__()):
        self.block_size = block_size
        self.tokenizer = tokenizer

        self.lines = load_dataset('wikitext', 'wikitext-103-raw-v1')['train']