dataset = BinaySeqDataset(args) dataloader = DataLoader(dataset, batch_size=1, shuffle=True, num_workers=4) model = NTM(M=args.memory_capacity, N=args.memory_vector_size, input_size=args.token_size, output_size=args.token_size, controller_out_dim=args.controller_output_dim, controller_hid_dim=args.controller_hidden_dim, ) print(model) criterion = torch.nn.BCELoss() optimizer = torch.optim.RMSprop(model.parameters(), lr=args.learning_rate) print("--------- Number of parameters -----------") print(model.calculate_num_params()) print("--------- Start training -----------") losses = [] if args.loadmodel != '': model.load_state_dict(torch.load(args.loadmodel)) for e, (X, Y) in enumerate(dataloader): tmp = time() model.initalize_state() optimizer.zero_grad()
For the Priority Sort task, input_size: seq_width + 1, output_size: seq_width """ ntm = NTM(input_size=task_params['seq_width'] + 1, output_size=task_params['seq_width'], controller_size=task_params['controller_size'], memory_units=task_params['memory_units'], memory_unit_size=task_params['memory_unit_size'], num_heads=task_params['num_heads'], multi_layer_controller=task_params['multi_layer_controller']) if args.load_model != "": ntm.load_state_dict(torch.load(args.load_model)) criterion = nn.BCELoss() # As the learning rate is task specific, the argument can be moved to json file optimizer = optim.RMSprop(ntm.parameters(), lr=args.lr, alpha=args.alpha, momentum=args.momentum) ''' optimizer = optim.Adam(ntm.parameters(), lr=args.lr, betas=(args.beta1, args.beta2)) ''' ''' args.saved_model = 'saved_model_copy.pt' args.saved_model = 'saved_model_repeatcopy.pt' args.saved_model = 'saved_model_associative.pt' args.saved_model = 'saved_model_ngram.pt' args.saved_model = 'saved_model_prioritysort.pt' '''
For the Reverse task, input_size: seq_width + 2, output_size: seq_width """ ntm = NTM(input_size=task_params['seq_width'] + 2, output_size=task_params['seq_width'], controller_size=task_params['controller_size'], memory_units=task_params['memory_units'], memory_unit_size=task_params['memory_unit_size'], num_heads=task_params['num_heads']) criterion = nn.BCELoss() # As the learning rate is task specific, the argument can be moved to json file # optimizer = optim.RMSprop(ntm.parameters(), # lr=args.lr, # alpha=args.alpha, # momentum=args.momentum) optimizer = optim.Adam(ntm.parameters(), lr=args.lr, betas=(args.beta1, args.beta2)) # ---------------------------------------------------------------------------- # -- basic training loop # ---------------------------------------------------------------------------- losses = [] losses2 = [] losses3 = [] errors = [] for iter in tqdm(range(args.num_iters)): optimizer.zero_grad() ntm.reset() data = dataset[iter]
model = MARNN(marnn_config,input_size=input_size, num_units=marnn_config.lstm_size, output_size=output_size, use_zoneout=False, use_ln=False) else: has_tau=1 marnn_config=args print('marnn_config:\n',marnn_config) model = MARNN(marnn_config,input_size=input_size, num_units=marnn_config.lstm_size, output_size=output_size, use_zoneout=False, use_ln=False) params=0 for p in model.parameters(): params+=p.numel() print('Number of parameters:',params) criterion = nn.BCELoss() # As the learning rate is task specific, the argument can be moved to json file if args.optim=='rmsp': optimizer = optim.RMSprop(model.parameters(), lr=args.lr, alpha=args.alpha, momentum=args.momentum) #else: # optimizer = optim.Adam(model.parameters(), lr=args.lr,eps=1e-5) #args.saved_model = 'saved_model_copy.pt' '''
model = NTM(M=args.memory_capacity, N=args.memory_vector_size, num_inputs=args.input_size, num_outputs=args.output_size, function_vector_size=args.function_size, max_program_length=args.max_program_length, controller_dim=args.controller_dim, input_embedding=dataset.input_embedding, output_embedding=dataset.output_embedding, ) print(model) criterion = torch.nn.MSELoss() optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate) print("--------- Number of parameters -----------") print(model.calculate_num_params()) print("--------- Start training -----------") losses = [] if args.loadmodel != '': model.load_state_dict(torch.load(args.loadmodel)) getPrograms(model) for e, (X, program, Y) in enumerate(dataloader): tmp = time() model.initalize_state() optimizer.zero_grad()
cuda=cuda) input_zero = Variable(torch.zeros(batch_size, dim + 1)) ntm = NTM(N, M, dim + 1, dim, batch_size=batch_size, lstm=lstm) ntm.reset() print(f"Model size: {model_size(ntm)}") if cuda: print("Using cuda.") input_zero = input_zero.cuda() ntm = ntm.cuda() criterion = torch.nn.BCEWithLogitsLoss() if not RMSprop: opt = torch.optim.Adam(ntm.parameters(), lr=lr) if RMSprop: opt = torch.optim.RMSprop(ntm.parameters(), lr=lr, momentum=.9, centered=True) nb_samples = 0 for step, inp, out in seqgen: nb_samples += batch_size ntm.reset() loss = 0 acc = 0