def gpu_thread_worker(nn, edge_queue, eval_batch_size, is_cuda): while True: with torch.no_grad(): nn.eval() edges = [] last_batch = False for i in range(eval_batch_size): if edge_queue.empty(): break try: edge = edge_queue.get_nowait() if edge is None: last_batch = True print( "Sentinel received. GPU will process this batch and terminate afterwards" ) else: edges.append(edge) except queue.Empty: pass if len(edges) != 0: # print("batch size:", len(edges)) # batch process states = [edge.to_node.checker_state for edge in edges] input_tensor = states_to_batch_tensor(states, is_cuda) # this line is the bottleneck if isinstance(nn, YesPolicy) or isinstance(nn, SharedPolicy): value_tensor, logits_tensor = nn(input_tensor) else: value_tensor = nn(input_tensor) if isinstance(nn, YesPolicy) or isinstance(nn, SharedPolicy): logits_tensor = value_tensor for edx, edge in enumerate(edges): edge.value = value_tensor[edx, 0] edge.logit = logits_tensor[edx, 0] edge_queue.task_done() edge.from_node.unassigned -= 1 if edge.from_node.unassigned == 0: edge.from_node.lock.release() else: time.sleep(0.1) if last_batch: edge_queue.task_done() print( "Queue task done signal sent. Queue will join. Thread may still be running." ) return
def grad_train_loop(hypers, nn, criterion=torch.nn.MSELoss(reduction='mean')): for epoch in range(hypers['epochs']): nn.train() train_losses, test_losses = [], [] for batch in range(hypers['batch_train']): graphs, labels = grad_generate_batch(hypers['H'], hypers['n']) loss = grad_eval_batch(nn, graphs, labels, criterion) loss.backward() train_losses.append(loss.item()) nn.optim.step(), nn.zero_grad() nn.eval() for batch in range(hypers['batch_test']): graphs, labels = grad_generate_batch(hypers['H'], hypers['n']) loss = grad_eval_batch(nn, graphs, labels, criterion) test_losses.append(loss.item()) print( f"Train loss is {sum(train_losses) / len(train_losses):.4E}.\nTest loss is {sum(test_losses)/len(test_losses):.4E}.\n" )
def loadWeights(self, ID): nn = Neural_Network() nn.load_state_dict(torch.load('data/' + str(ID) + '_model.pt')) nn.eval()
h = torch.add(score_pool4, upsample_conv7) #1/16 #deconv-2 h = self.upsample_dc1(h) #1/16 -> 1/8 upsample_dc1 = h # 1/8 h = self.score_pool3(pool3) #256 -> 1 score_pool3 = h # 1/8 h = torch.add(upsample_dc1, score_pool3) #1/8 #output #output h = F.pad(h, [0, 0, 0, 1]) h = self.upsample_out(h) h = h[:, :, 3:3 + x.size()[2]].contiguous() return h if __name__ == '__main__': nn = QuadriNet_LESS_Fancy() nn.eval() #crop 405,720 -> 384, 704 t = torch.randn(1, 3, 405, 720) #-> 135*240 x = nn(t) print(x.shape)