UPDATE_TARGET_N = 10 BATCH_SIZE = 128 RUNS = 1500 STEPS = 500 R_MEMORY = ReplayMemory(10000) MIN_EPSILON = 0.04 successes = 0 GAMMA = 0.99 epsilon = 0.4 l_r = 0.001 state_0 = env.reset p_network = NN() target_network = NN() target_network.load_state_dict(p_network.state_dict()) target_network.eval() loss_function = nn.MSELoss() # mean squared error optimizer = optim.SGD(p_network.parameters(), lr=l_r) # Scheduler will adjust learning rate after every run with the factor gamma scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.9) steps_history = [] # turn Experience Replay here on or off ####################################### Experience_Replay = False ############## ####################################### for run in trange(RUNS): state_0 = env.reset()
for root, dirs, files in os.walk("./data", topdown=False): #load all the data for name in files: if ("dataset" in name): with open(os.path.join(root, name), 'r') as infile: d = json.load(infile) data['X'] += d['X'] data['y'] += d['y'] if (len(sys.argv) > 1 and sys.argv[1] == 'test'): #if there is the argument "test" print("Testing network.pt...") network = NN(len(data['X'][0]) - 1, len(data['y'][0])) #load the network in network.pt network.load_state_dict(torch.load("./network.pt")) network.eval() g_predict = test(data, network) #test the network on all the dataset print(g_predict * 100, "%") #print the result else: #else if there is not the argument test print("Training a new network...") network = NN(len(data['X'][0]) - 1, len(data['y'][0])) #create a new network optimizer = torch.optim.Adam(network.parameters(), lr=1e-6) #define the optimizer loss = nn.CrossEntropyLoss() #define the loss function data_test = {'X': [], 'y': []}
y_hat, z_all, z_context = model(x_context, y_context, x_all, y_all) loss = np_loss(y_hat, y_all, z_all, z_context) loss.backward() train_loss += loss.item() optimizer.step() if batch_idx % 10 == 0: print('Epoch: ', i) print('Batch: ', batch_idx) print('loss: ', loss.item()) if (i + 1) > 100 and (i + 1) % 20 == 0: weight_name = 'weight/epoch_%d.pkl' % (i + 1) torch.save(model.state_dict(), weight_name) # test model.eval() test_loss = 0 with torch.no_grad(): for batch_idx in range(num_test): batch_order = random.sample(range(1, num_train), BATCH_SIZE) y_all = colorImg_train[batch_order, :] y_all = y_all.cuda() y_all = torch.reshape(y_all, (BATCH_SIZE, -1, 1)) N = 300 context_idx = get_context_idx(N) x_context = idx_to_x(context_idx, BATCH_SIZE) y_context = idx_to_y(context_idx, y_all)