def __init__(self, config: Munch = None, **kwargs): if config == None: config = Miner.default_config(); bittensor.config.Config.update_with_kwargs(config.miner, kwargs) Miner.check_config(config) self.config = config # ---- Model ---- self.model = BertMLMSynapse( self.config ) # ---- Optimizer ---- self.optimizer = torch.optim.SGD(self.model.parameters(), lr = self.config.miner.learning_rate, momentum=self.config.miner.momentum) self.scheduler = WarmupCosineWithHardRestartsSchedule(self.optimizer, 50, 300) # ---- Model Load/Save tools ---- self.model_toolbox = ModelToolbox(BertMLMSynapse, torch.optim.SGD) # ---- Dataset ---- # Dataset: 74 million sentences pulled from books. self.dataset = load_dataset('ag_news')['train'] # The collator accepts a list [ dict{'input_ids, ...; } ] where the internal dict # is produced by the tokenizer. self.data_collator = DataCollatorForLanguageModeling ( tokenizer=bittensor.__tokenizer__(), mlm=True, mlm_probability=0.15 ) super( Miner, self ).__init__( self.config, **kwargs )
def train(self): self.training_loss = 0.0 for local_step in range(self.config.miner.epoch_length): # ---- Forward pass ---- inputs = nextbatch(self.dataset, self.config.miner.batch_size_train, bittensor.__tokenizer__()) output = self.model.remote_forward( self.neuron, inputs.to(self.model.device), training=True, ) # ---- Backward pass ---- loss = output.local_target_loss + output.distillation_loss + output.remote_target_loss loss.backward() # Accumulates gradients on the model. self.optimizer.step() # Applies accumulated gradients. self.optimizer.zero_grad( ) # Zeros out gradients for next accummulation # ---- Train row weights ---- batch_weights = torch.mean(output.router.weights, axis=0).to( self.model.device) # Average over batch. self.row = ( 1 - 0.03) * self.row + 0.03 * batch_weights # Moving avg update. self.row = F.normalize(self.row, p=1, dim=0) # Ensure normalization. # ---- Step logs ---- logger.info( 'GS: {} LS: {} Epoch: {}\tLocal Target Loss: {}\tRemote Target Loss: {}\tDistillation Loss: {}\tAxon: {}\tDendrite: {}', colored('{}'.format(self.global_step), 'red'), colored('{}'.format(local_step), 'blue'), colored('{}'.format(self.epoch), 'green'), colored('{:.4f}'.format(output.local_target_loss.item()), 'green'), colored('{:.4f}'.format(output.remote_target_loss.item()), 'blue'), colored('{:.4f}'.format(output.distillation_loss.item()), 'red'), self.neuron.axon, self.neuron.dendrite) logger.info('Codes: {}', output.router.return_codes.tolist()) self.tensorboard.add_scalar('Neuron/Rloss', output.remote_target_loss.item(), self.global_step) self.tensorboard.add_scalar('Neuron/Lloss', output.local_target_loss.item(), self.global_step) self.tensorboard.add_scalar('Neuron/Dloss', output.distillation_loss.item(), self.global_step) # ---- Step increments ---- self.global_step += 1 self.training_loss += output.local_target_loss.item() # --- Memory clean up ---- torch.cuda.empty_cache() del output
def __init__(self, config: Munch): self.config = config # ---- Neuron ---- self.neuron = Neuron(self.config) # ---- Model ---- self.model = BertMLMSynapse(self.config) # ---- Optimizer ---- self.optimizer = torch.optim.SGD(self.model.parameters(), lr=self.config.session.learning_rate, momentum=self.config.session.momentum) self.scheduler = WarmupCosineWithHardRestartsSchedule( self.optimizer, 50, 300) # ---- Dataset ---- # Dataset: 74 million sentences pulled from books. self.dataset = load_dataset('bookcorpus')['train'] # The collator accepts a list [ dict{'input_ids, ...; } ] where the internal dict # is produced by the tokenizer. self.data_collator = DataCollatorForLanguageModeling( tokenizer=bittensor.__tokenizer__(), mlm=True, mlm_probability=0.15) # ---- Logging ---- self.tensorboard = SummaryWriter(log_dir=self.config.session.full_path) if self.config.session.record_log: logger.add( self.config.session.full_path + "/{}_{}.log".format( self.config.session.name, self.config.session.trial_uid), format="{time:YYYY-MM-DD at HH:mm:ss} | {level} | {message}")
def __init__(self, config: Munch = None, **kwargs): if config == None: config = Miner.default_config() bittensor.config.Config.update_with_kwargs(config.miner, kwargs) Miner.check_config(config) self.config = config # ---- Neuron ---- self.neuron = bittensor.neuron.Neuron(self.config) # ---- Model ---- self.model = BertMLMSynapse(self.config) # ---- Optimizer ---- self.optimizer = torch.optim.SGD(self.model.parameters(), lr=self.config.miner.learning_rate, momentum=self.config.miner.momentum) self.scheduler = WarmupCosineWithHardRestartsSchedule( self.optimizer, 50, 300) # ---- Model Load/Save tools ---- self.model_toolbox = ModelToolbox(BertMLMSynapse, torch.optim.SGD) # ---- Dataset ---- # Dataset: 74 million sentences pulled from books. self.dataset = load_dataset('ag_news')['train'] # The collator accepts a list [ dict{'input_ids, ...; } ] where the internal dict # is produced by the tokenizer. self.data_collator = DataCollatorForLanguageModeling( tokenizer=bittensor.__tokenizer__(), mlm=True, mlm_probability=0.15) # ---- Logging ---- self.tensorboard = SummaryWriter(log_dir=self.config.miner.full_path) if self.config.miner.record_log == True: filepath = self.config.miner.full_path + "/{}_{}.log".format( self.config.miner.name, self.config.miner.trial_uid), logger.add( filepath, format="{time:YYYY-MM-DD at HH:mm:ss} | {level} | {message}", rotation="250 MB", retention="10 days")
def __init__(self, batch_size, block_size, config=None): super(GenesisTextDataloader, self).__init__() assert batch_size > 0, 'Batch size must be larger than 0' assert block_size > 0, 'Block size must be larger than 0' if config == None: config = BittensorDataLoader.default_config() self.config = config self.block_size = block_size self.tokenizer = bittensor.__tokenizer__() self.batch_size = batch_size # Retrieve a random slice of the genesis dataset self.data = self.construct_text_corpus() # Used to refresh corpus if we've exhausted the whole dataset self.refresh_corpus = False
def __init__(self, block_size: int, tokenizer=bittensor.__tokenizer__()): self.block_size = block_size self.tokenizer = tokenizer self.lines = load_dataset('glue', 'cola')['train']
def __init__(self, block_size: int, tokenizer=bittensor.__tokenizer__()): self.block_size = block_size self.tokenizer = tokenizer self.lines = load_dataset('wikitext', 'wikitext-103-raw-v1')['train']