train_length = int(len(all_data) * args.train_ratio) val_length = len(all_data) - train_length if args.validation_split_seed is not None: torch.manual_seed(args.validation_split_seed) train_data, val_data = torch.utils.data.random_split( all_data, [train_length, val_length]) if args.seed is not None: torch.manual_seed(args.seed) elif args.validation_split_seed is not None: torch.manual_seed(torch.initial_seed()) train_loader = get_variable_length_protein_dataLoader( train_data, batch_size=args.batch_size, shuffle=True, use_weights=args.use_weights) val_loader = get_variable_length_protein_dataLoader( val_data, batch_size=args.batch_size, use_weights=args.use_weights) print("Data loaded!") model = WaveNet(input_channels=NUM_TOKENS, residual_channels=args.residual_channels, out_channels=NUM_TOKENS, stacks=args.stacks, layers_per_stack=args.layers, total_samples=train_length, l2_lambda=args.L2, bias=args.bias,
if args.seed is not None: torch.manual_seed(args.seed) print(f"Random seed set to {args.seed}") data_device = torch.device(args.device) if args.multi_gpu: data_device = torch.device("cpu") # Load data train_data = IterProteinDataset(args.train_data, device = data_device) validation_data = IterProteinDataset(args.validation_data, device = data_device) val_len = len(validation_data) train_seqs_per_epoch = val_len * 9 train_loader = get_variable_length_protein_dataLoader(train_data, batch_size = args.batch_size) val_loader = get_variable_length_protein_dataLoader(validation_data, batch_size = args.batch_size) print("Data loaded!") total_samples = 39_069_211 # magic number model = WaveNet( input_channels = NUM_TOKENS, residual_channels = args.residual_channels, out_channels = NUM_TOKENS, stacks = args.stacks, layers_per_stack = args.layers, total_samples = total_samples, l2_lambda = args.L2, bias = args.bias, dropout = args.dropout,