def __init__(self, model: torch.nn.Module, criterion: torch.nn.Module, optimizer: torch.optim.Optimizer, metrics: Dict[str, Metric], data_loaders: AttrDict, max_norm: float = None, norm_type: int = 2, scheduler: torch.optim.lr_scheduler._LRScheduler = None, is_iteration_scheduler: bool = False, device: torch.cuda.device = None, mixed_precision: bool = False, backup_path: str = None, name: str = 'trainer', logger: Logger = None, finished_epochs: int = 0): if mixed_precision: model = network_to_half(model) self.model = model self.criterion = criterion self.optimizer = optimizer self.metrics = metrics self.data_loaders = data_loaders self.max_norm = max_norm self.norm_type = norm_type self.scheduler = scheduler self.is_iteration_scheduler = is_iteration_scheduler self.device = device self.backup_path = backup_path self.finished_epochs = finished_epochs self.finished_iterations = finished_epochs * len(data_loaders.train) self.name = name self.logger = logger or get_logger() self._progress_bar = None self._description = 'ITERATION - loss: {:.3f}' self._trainer = create_supervised_trainer( model=model, optimizer=optimizer, loss_fn=criterion, max_norm=max_norm, norm_type=norm_type, device=device, mixed_precision=mixed_precision) self._register_handlers(self._trainer) self._evaluator = create_supervised_evaluator(model, metrics, device) self._epoch = 0 self._iteration = 0 self._train_loss = ExponentialMovingAverage()
import time import datetime import requests from typing import Dict from bot.service.notifications_service import send_notifications from commons.constants import MIN_AGE, MIN_CAPACITY, districts, cowin_host, calender_api, cowin_request_headers, \ DISTRICT_ITERATION_INTERVAL_SECONDS, DISTRICT_SKIP_INTERVAL, COMPLETE_ITERATION_INTERVAL_SECONDS, \ calender_api_public from commons.utils import get_logger logger = get_logger(__name__) # stores last fetched available slots for a session_id slot_cache: Dict[str, int] = {} # stores skip interval for district id district_cache: Dict[str, int] = {} def check_slots_in_response(response): found_new_free_slots = False centers = response.get("centers") for center in centers: for session in center.get('sessions'): session_id = session.get('session_id') available_capacity = session.get('available_capacity') min_age_limit = session.get('min_age_limit') date = session.get('date') if min_age_limit == MIN_AGE and MIN_CAPACITY <= available_capacity != slot_cache.get( session_id, -1): slot_cache[session_id] = available_capacity
for threshold in [0.5, 0.3, 0.2]: logger.info(f'Threshold: {threshold:.2f}') thresholds = np.zeros(HumanProteinDataset.NUM_CLASSES) + threshold utils.eval_thresholds(y_true_train, y_pred_train, thresholds, 'train') utils.eval_thresholds(y_true_valid, y_pred_valid, thresholds, 'valid') path = f'{config.submission_path}/{config.exp}_{int(10 * threshold):02}.csv' utils.save_submission(y_pred_test > threshold, y_true_test, path) if __name__ == '__main__': remove_resource_limits() init_logger(f'{config.exp}_sz{config.image_size}_x{config.batch_size}', config.log_path, config.tensorboard_path) logger = get_logger() logger.info(f'PID: {os.getpid()}') device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') default_loaders, loaders = get_loaders(path=config.data_path, image_size=config.image_size, n_splits=config.k_fold, test_size=config.test_size, batch_size=config.batch_size, num_workers=config.num_workers, external=config.external_data, use_sampler=config.use_sampler) metrics = { 'loss': Loss(FocalLoss()),
def train_model(name: str, model: torch.nn.Module, data_loaders: AttrDict, metrics: Dict, device: torch.cuda.device, lr: float, num_epochs: List[int], cycles_len: List[int], lr_divs: List[int], mixed_precision: bool = False, backup_path: str = None): fix_seed() logger = get_logger() num_batches = len(data_loaders.train) criterion = BCEWithLogitsLoss() param_groups = [ [model.conv1, model.bn1, model.layer1, model.layer2], [model.layer3, model.layer4], [model.last_linear] ] lrs = np.array([lr / 10, lr / 3, lr]) logger.info(f'Learning rate: {lr:.5f}') logger.info(f'Learning rates: {lrs}') finished_epochs = 0 for it, (epochs, lr_div, cycle_len) in enumerate(zip(num_epochs, lr_divs, cycles_len)): logger.info('Creating new trainer...') logger.info(f'Epochs: {epochs}') logger.info('Optimizer: Adam') if lr_div: optimizer = Adam(get_group_params(param_groups, lrs / lr_div), lr=lr) logger.info(f'Learning rate divider: {lr_div}') else: optimizer = Adam(model.parameters(), lr=lr) if cycle_len: scheduler = CircularLR(optimizer, cycle_len=cycle_len * num_batches, lr_div=10, cut_div=30) logger.info(f'Scheduler: {scheduler}') else: scheduler = None trainer = Trainer(name=name, model=model, criterion=criterion, optimizer=optimizer, metrics=metrics, data_loaders=data_loaders, max_norm=1, scheduler=scheduler, is_iteration_scheduler=True, device=device, mixed_precision=mixed_precision, finished_epochs=finished_epochs) trainer.train(num_epochs=epochs) finished_epochs += epochs trainer.save_checkpoint(f'{backup_path}/checkpoint_{name}_{finished_epochs:02}.pth') unfreeze(model)