def on_epoch_end(self, epoch, logs=None): # send message of epoch end message = job_name + ' epoch_end' send_signal.send(args.node, 10002, message)
batch_size = 32 args_lr = 0.002 args_model = 'mnasnet' epoch_begin_time = 0 job_name = sys.argv[0].split('.')[0] save_files = '/scratch/li.baol/checkpoint_unaware/' + job_name + '*' total_epochs = 19 starting_epoch = 0 # first step is to update the PID pid = os.getpid() message = job_name + ' pid ' + str(pid) # 'job50 pid 3333' send_signal.send(args.node, 10002, message) if args.resume: save_file = glob.glob(save_files)[0] # epochs = int(save_file.split('/')[4].split('_')[1].split('.')[0]) starting_epoch = int(save_file.split('/')[4].split('.')[0].split('_')[-1]) data_augmentation = True num_classes = 10 # Subtracting pixel mean improves accuracy subtract_pixel_mean = True n = 3 # Model name, depth and version
def on_epoch_end(self, epoch, logs=None): if epoch == starting_epoch and args.resume: first_epoch_time = int(time.time() - first_epoch_start) message = job_name + ' 1st_epoch ' + str(first_epoch_time) send_signal.send(args.node, 10002, message)