def test_index_setitem_bools_slices(self): true = variable(1).byte() false = variable(0).byte() tensors = [Variable(torch.randn(2, 3))] if torch._C._with_scalars(): tensors.append(variable(3)) for a in tensors: # prefix with a 1,1, to ensure we are compatible with numpy which cuts off prefix 1s # (some of these ops already prefix a 1 to the size) neg_ones = torch.ones_like(a) * -1 neg_ones_expanded = neg_ones.unsqueeze(0).unsqueeze(0) a[True] = neg_ones_expanded self.assertEqual(a, neg_ones) a[False] = 5 self.assertEqual(a, neg_ones) if torch._C._with_scalars(): a[true] = neg_ones_expanded * 2 self.assertEqual(a, neg_ones * 2) a[false] = 5 self.assertEqual(a, neg_ones * 2) a[None] = neg_ones_expanded * 3 self.assertEqual(a, neg_ones * 3) a[...] = neg_ones_expanded * 4 self.assertEqual(a, neg_ones * 4) if a.dim() == 0: with self.assertRaises(RuntimeError): a[:] = neg_ones_expanded * 5
def test_index_setitem_bools_slices(self): true = variable(1).byte() false = variable(0).byte() tensors = [Variable(torch.randn(2, 3))] if torch._C._with_scalars(): tensors.append(variable(3)) for a in tensors: a_clone = a.clone() # prefix with a 1,1, to ensure we are compatible with numpy which cuts off prefix 1s # (some of these ops already prefix a 1 to the size) neg_ones = torch.ones_like(a) * -1 neg_ones_expanded = neg_ones.unsqueeze(0).unsqueeze(0) a[True] = neg_ones_expanded self.assertEqual(a, neg_ones) a[False] = 5 self.assertEqual(a, neg_ones) if torch._C._with_scalars(): a[true] = neg_ones_expanded * 2 self.assertEqual(a, neg_ones * 2) a[false] = 5 self.assertEqual(a, neg_ones * 2) a[None] = neg_ones_expanded * 3 self.assertEqual(a, neg_ones * 3) a[...] = neg_ones_expanded * 4 self.assertEqual(a, neg_ones * 4) if a.dim() == 0: with self.assertRaises(RuntimeError): a[:] = neg_ones_expanded * 5
def genrate(enc, dec, inp, decoder_inp, args, input_len, target_len): enc_output, state = enc( inp, device) #encoder_output=[8,400,512] #state=([2,8,512])*2 dec_inp = torch.ones(enc_output.size(0), 1, dtype=torch.long).to(device) * 2 #2 for <sos> coverage = variable(torch.zeros(dec.batch_size, dec.max_enc)).to(device) #8,400 dec_state = dec.hid_init(state, device) cov_loss = 0 preds_summ = variable(torch.zeros(args['batch'], decoder_inp.size(1)), requires_grad=True) preds = variable(torch.zeros( (args['batch'], target_len, args['vocab_size'] + args['max_oovs'])), requires_grad=False).contiguous().to(device) for i in range(decoder_inp.size(1)): dec_state, p_final, coverage, attn, p_gen = dec( enc_output, dec_inp, inp, dec_state, coverage, input_len, target_len) dec_inp = torch.transpose(decoder_inp[:, i].unsqueeze(0), 1, 0) # cov_loss+=torch.sum(torch.min(coverage,attn)) # p_final=torch.softmax(p_final,1) # p_final+=((p_final==0)*1e-20).to(torch.float) pred = torch.argmax(p_final, 1) for j in range(len(pred)): preds_summ[j, i] = pred[j] preds[:, i, :] += p_final # if i==0: # preds=p_final.unsqueeze(1) # else: # preds=torch.cat((preds,p_final.unsqueeze(1)),1) del cov_loss, dec_state, coverage, p_gen return (preds_summ, preds, p_final, attn)
def test_index_getitem_copy_bools_slices(self): true = variable(1).byte() false = variable(0).byte() tensors = [Variable(torch.randn(2, 3)), variable(3)] for a in tensors: self.assertNotEqual(a.data_ptr(), a[True].data_ptr()) self.assertEqual(variable([]), a[False]) self.assertNotEqual(a.data_ptr(), a[true].data_ptr()) self.assertEqual(variable([]), a[false]) self.assertEqual(a.data_ptr(), a[None].data_ptr()) self.assertEqual(a.data_ptr(), a[...].data_ptr())
def test_index_getitem_copy_bools_slices(self): true = variable(1).byte() false = variable(0).byte() tensors = [Variable(torch.randn(2, 3)), variable(3)] for a in tensors: self.assertNotEqual(a.data_ptr(), a[True].data_ptr()) self.assertEqual(variable([]), a[False]) self.assertNotEqual(a.data_ptr(), a[true].data_ptr()) self.assertEqual(variable([]), a[false]) self.assertEqual(a.data_ptr(), a[None].data_ptr()) self.assertEqual(a.data_ptr(), a[...].data_ptr())
def test_zero_dim_index(self): # We temporarily support indexing a zero-dim tensor as if it were # a one-dim tensor to better maintain backwards compatibility. x = variable(10) with warnings.catch_warnings(record=True) as w: self.assertEqual(x, x[0]) self.assertEqual(len(w), 1)
def dataframe_to_variable_float(param): #param=param.T train_Value = variable(torch.from_numpy(param.values)).long() # train_Value = train_Value.reshape(len(train_Value), 1) # train_Value = torch.LongTensor(train_Value) # train_Value = torch.zeros(len(train_Value), 4).scatter_(1, train_Value, 1).float() return train_Value
def test_zero_dim_index(self): # We temporarily support indexing a zero-dim tensor as if it were # a one-dim tensor to better maintain backwards compatibility. x = variable(10) with warnings.catch_warnings(record=True) as w: self.assertEqual(x, x[0]) self.assertEqual(len(w), 1)
def build_and_train_netCOS(hidden1, hidden2, max_iterations, min_error, all_queries, all_imgs, batch_size): all_queries = Variable(torch.Tensor(all_queries)) all_imgs = variable(torch.tensor(all_imgs)) model = NLR2(all_queries.shape[1], all_imgs.shape[1], hidden1, hidden2) #model=model.cuda() torch.manual_seed(3) loss_fn = torch.nn.CosineSimilarity(dim=1, eps=1e-6) torch.manual_seed(3) #criterion = nn.CosineSimilarity() criterion = nn.CosineSimilarity(dim=1, eps=1e-6) #loss.backward() optimizer = torch.optim.SGD(model.parameters(), lr=0.002) epoch = max_iterations losses = [] totallosses = [] for j in range(epoch): total_loss = 0 for l in range(int(all_queries.shape[0] / batch_size)): item_batch = all_queries[l * batch_size:(l + 1) * batch_size - 1, :] target_batch = all_imgs[l * batch_size:(l + 1) * batch_size - 1, :] netoutbatch = model.myforward(item_batch) #loss = loss_fn(target_batch,netoutbatch) loss = torch.mean(torch.abs(1 - loss_fn(target_batch, netoutbatch))) #loss=1-loss losses.append(loss) optimizer.zero_grad() loss.backward() optimizer.step() total_loss += loss if (l % 1000 == 0): print('Epoch:', j, ' get images batch=', l * batch_size, ':', (l + 1) * batch_size, 'loss', loss, end='\r') if (total_loss < min_error): break print('iteration:', j, 'total loss', total_loss) totallosses.append(total_loss) if (j % 1000 == 0): torch.save(model.state_dict(), Path1 + r'\NLPCOS172K' + str(j) + '.pth') print('mean square loss', loss_fn(model.myforward(all_queries), all_imgs)) print('Finished Training') with open(Path1 + r"/" + 'loosses2.pkl', 'wb') as fp: pickle.dump(totallosses, fp) torch.save(model.state_dict(), Path1 + r'\NLPCOSfinal172k.pth')
def __init__(self, concentration1, concentration0): if isinstance(concentration1, Number) and isinstance(concentration0, Number): concentration1_concentration0 = variable([concentration1, concentration0]) else: concentration1, concentration0 = broadcast_all(concentration1, concentration0) concentration1_concentration0 = torch.stack([concentration1, concentration0], -1) self._dirichlet = Dirichlet(concentration1_concentration0) super(Beta, self).__init__(self._dirichlet._batch_shape)
def test_getitem_scalars(self): zero = variable(0).long() one = variable(1).long() # non-scalar indexed with scalars a = Variable(torch.randn(2, 3)) self.assertEqual(a[0], a[zero]) self.assertEqual(a[0][1], a[zero][one]) self.assertEqual(a[0, 1], a[zero, one]) self.assertEqual(a[0, one], a[zero, 1]) # scalar indexed with scalar r = variable(0).normal_() with self.assertRaises(RuntimeError): r[:] with self.assertRaises(IndexError): r[zero] self.assertEqual(r, r[...])
def _infinite_like(tensor): """ Helper function for obtaining infinite KL Divergence throughout """ # verbose because of differening Variable/Tensor apis and lack of dtypes if isinstance(tensor, Variable): return variable(float('inf')).type_as(tensor).expand_as(tensor) else: return tensor.new([float('inf')]).expand_as(tensor)
def test_setitem_expansion_error(self): true = variable(1).byte() a = Variable(torch.randn(2, 3)) # check prefix with non-1s doesn't work a_expanded = a.expand(torch.Size([5, 1]) + a.size()) with self.assertRaises(RuntimeError): a[True] = a_expanded with self.assertRaises(RuntimeError): a[true] = torch.autograd.Variable(a_expanded)
def test_setitem_expansion_error(self): true = variable(1).byte() a = Variable(torch.randn(2, 3)) # check prefix with non-1s doesn't work a_expanded = a.expand(torch.Size([5, 1]) + a.size()) with self.assertRaises(RuntimeError): a[True] = a_expanded with self.assertRaises(RuntimeError): a[true] = torch.autograd.Variable(a_expanded)
def test_getitem_scalars(self): zero = variable(0).long() one = variable(1).long() # non-scalar indexed with scalars a = Variable(torch.randn(2, 3)) self.assertEqual(a[0], a[zero]) self.assertEqual(a[0][1], a[zero][one]) self.assertEqual(a[0, 1], a[zero, one]) self.assertEqual(a[0, one], a[zero, 1]) # scalar indexed with scalar r = variable(0).normal_() with self.assertRaises(RuntimeError): r[:] with self.assertRaises(RuntimeError): r[zero] self.assertEqual(r, r[...])
def _infinite_like(tensor): """ Helper function for obtaining infinite KL Divergence throughout """ # verbose because of differening Variable/Tensor apis and lack of dtypes if isinstance(tensor, Variable): return variable(float('inf')).type_as(tensor).expand_as(tensor) else: return tensor.new([float('inf')]).expand_as(tensor)
def forward(self, output_stepnumber, hidden_states): assert hidden_states != None input = variable( torch.tensor( torch.zeros(output_stepnumber, hidden_states[0][0].size()[0], hidden_states[0][0].size()[1], hidden_states[0][0].size()[2], hidden_states[0][0].size()[3]))) output, all_hidden_states = self.layers(input, hidden_states) return output, all_hidden_states
def __init__(self, concentration1, concentration0): if isinstance(concentration1, Number) and isinstance( concentration0, Number): concentration1_concentration0 = variable( [concentration1, concentration0]) else: concentration1, concentration0 = broadcast_all( concentration1, concentration0) concentration1_concentration0 = torch.stack( [concentration1, concentration0], -1) self._dirichlet = Dirichlet(concentration1_concentration0) super(Beta, self).__init__(self._dirichlet._batch_shape)
def test_ellipsis_index(self): a = tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) self.assertIsNot(a[...], a) self.assertEqual(a[...], a) # `a[...]` was `a` in numpy <1.9. self.assertEqual(a[...].data_ptr(), a.data_ptr()) # Slicing with ellipsis can skip an # arbitrary number of dimensions self.assertEqual(a[0, ...], a[0]) self.assertEqual(a[0, ...], a[0, :]) self.assertEqual(a[..., 0], a[:, 0]) # In NumPy, slicing with ellipsis results in a 0-dim array. In PyTorch # we don't have separate 0-dim arrays and scalars. self.assertEqual(a[0, ..., 1], variable(2)) # Assignment with `(Ellipsis,)` on 0-d arrays b = variable(1) b[(Ellipsis,)] = 2 self.assertEqual(b, 2)
def test_setitem_scalars(self): zero = variable(0).long() # non-scalar indexed with scalars a = Variable(torch.randn(2, 3)) a_set_with_number = a.clone() a_set_with_scalar = a.clone() b = Variable(torch.randn(3)) a_set_with_number[0] = b a_set_with_scalar[zero] = b self.assertEqual(a_set_with_number, a_set_with_scalar) a[1, zero] = 7.7 self.assertEqual(7.7, a[1, 0]) # scalar indexed with scalars r = variable(0).normal_() with self.assertRaises(RuntimeError): r[:] = 8.8 with self.assertRaises(RuntimeError): r[zero] = 8.8 r[...] = 9.9 self.assertEqual(9.9, r)
def test_setitem_scalars(self): zero = variable(0).long() # non-scalar indexed with scalars a = Variable(torch.randn(2, 3)) a_set_with_number = a.clone() a_set_with_scalar = a.clone() b = Variable(torch.randn(3)) a_set_with_number[0] = b a_set_with_scalar[zero] = b self.assertEqual(a_set_with_number, a_set_with_scalar) a[1, zero] = 7.7 self.assertEqual(7.7, a[1, 0]) # scalar indexed with scalars r = variable(0).normal_() with self.assertRaises(RuntimeError): r[:] = 8.8 with self.assertRaises(IndexError): r[zero] = 8.8 r[...] = 9.9 self.assertEqual(9.9, r)
def broadcast_all(*values): """ Given a list of values (possibly containing numbers), returns a list where each value is broadcasted based on the following rules: - `torch.Tensor` and `torch.autograd.Variable` instances are broadcasted as per the `broadcasting rules <http://pytorch.org/docs/master/notes/broadcasting.html>`_ - numbers.Number instances (scalars) are upcast to Variables having the same size and type as the first tensor passed to `values`. If all the values are scalars, then they are upcasted to Variables having size `(1,)`. Args: values (list of `numbers.Number`, `torch.autograd.Variable` or `torch.Tensor`) Raises: ValueError: if any of the values is not a `numbers.Number`, `torch.Tensor` or `torch.autograd.Variable` instance """ values = list(values) scalar_idxs = [ i for i in range(len(values)) if isinstance(values[i], Number) ] tensor_idxs = [ i for i in range(len(values)) if torch.is_tensor(values[i]) or isinstance(values[i], Variable) ] if len(scalar_idxs) + len(tensor_idxs) != len(values): raise ValueError( 'Input arguments must all be instances of numbers.Number, torch.Tensor or ' + 'torch.autograd.Variable.') if tensor_idxs: broadcast_shape = _broadcast_shape( [values[i].size() for i in tensor_idxs]) for idx in tensor_idxs: values[idx] = values[idx].expand(broadcast_shape) template = values[tensor_idxs[0]] if len(scalar_idxs) > 0 and not isinstance(template, torch.autograd.Variable): raise ValueError(( 'Input arguments containing instances of numbers.Number and torch.Tensor ' 'are not currently supported. Use torch.autograd.Variable instead of torch.Tensor' )) for idx in scalar_idxs: values[idx] = template.new(template.size()).fill_(values[idx]) else: for idx in scalar_idxs: values[idx] = variable(values[idx]) return values
def build_and_train_netMSE(hidden1, hidden2, max_iterations, min_error, all_queries, all_imgs, batch_size): all_queries = Variable(torch.Tensor(all_queries)) all_imgs = variable(torch.tensor(all_imgs)) model = NLR2(all_queries.shape[1], all_imgs.shape[1], hidden1, hidden2) #model=model.cuda() torch.manual_seed(3) loss_fn = torch.nn.MSELoss() torch.manual_seed(3) #criterion = nn.CosineSimilarity() criterion = nn.MSELoss() #loss.backward() optimizer = torch.optim.SGD(model.parameters(), lr=0.001) epoch = max_iterations losses = [] totallosses = [] for j in range(epoch): total_loss = 0 for l in range(int(all_queries.shape[0] / batch_size)): item_batch = all_queries[l * batch_size:(l + 1) * batch_size - 1, :] target_batch = all_imgs[l * batch_size:(l + 1) * batch_size - 1, :] netoutbatch = model.myforward(item_batch) loss = loss_fn(target_batch, netoutbatch) losses.append(loss) optimizer.zero_grad() loss.backward() optimizer.step() total_loss += loss if (l % 1000 == 0): print('Epoch:', j, ' get images batch=', l * batch_size, ':', (l + 1) * batch_size, 'loss', loss, end='\r') if (total_loss < min_error): break print('iteration:', j, 'total loss', total_loss) totallosses.append(total_loss) print('mean square loss', loss_fn(model.myforward(all_queries), all_queries)) print('Finished Training') torch.save(model.state_dict(), Path1 + r'\NLPMSEt.pth')
def detect (frame, net, transform): height, width=frame.shape[:2] frame_t= transform(frame)[0] x=torch.from_numpy(frame_t).permute(2,0,1) x=variable(x.unsqueeze(0)) y=net(x) detections = y.data scale= torch.Tensor([width, height,width, height]) for i in range (detections.size(1)): j=0 while detections[0,i,j,0]>=0.5: pt = (detections[0, i, j, 1:] * scale).numpy() cv2.rectangle(frame, (int(pt[0]), int(pt[1])), (int(pt[2]), int(pt[3])), (255,0,0),2) cv2.putText(frame, labelmap[i-1],(int(pt[0]), int(pt[1])), cv2.FONT_HERSHEY_SIMPLEX,2,(255,255,255), 2 ,cv2.LINE_AA) j+=1 return frame
def forward(self, inp): bs = inp.size()[1] if bs != self.bs: self.bs = bs e_out = self.e(inp) h0 = c0 = variable(e_out.data.new(*(self.nl, self.bs, self.hidden_size)).zero_()) rnn_o, _ = self.rnn(e_out, (h0, c0)) rnn_o = rnn_o[-1] fc = F.dropout(self.fc2(rnn_o), p=0.8) return F.log_softmax(fc, dim=1) x, = self.rnn(x) s, b, h = x.size() x = x.view(s * b, h) x = self.layer2(x) x = x.view(s, b, -1) return x
def broadcast_all(*values): r""" Given a list of values (possibly containing numbers), returns a list where each value is broadcasted based on the following rules: - `torch.Tensor` and `torch.autograd.Variable` instances are broadcasted as per the `broadcasting rules <http://pytorch.org/docs/master/notes/broadcasting.html>`_ - numbers.Number instances (scalars) are upcast to Variables having the same size and type as the first tensor passed to `values`. If all the values are scalars, then they are upcasted to Variables having size `(1,)`. Args: values (list of `numbers.Number`, `torch.autograd.Variable` or `torch.Tensor`) Raises: ValueError: if any of the values is not a `numbers.Number`, `torch.Tensor` or `torch.autograd.Variable` instance """ values = list(values) scalar_idxs = [i for i in range(len(values)) if isinstance(values[i], Number)] tensor_idxs = [i for i in range(len(values)) if torch.is_tensor(values[i]) or isinstance(values[i], Variable)] if len(scalar_idxs) + len(tensor_idxs) != len(values): raise ValueError('Input arguments must all be instances of numbers.Number, torch.Tensor or ' + 'torch.autograd.Variable.') if tensor_idxs: broadcast_shape = _broadcast_shape([values[i].size() for i in tensor_idxs]) for idx in tensor_idxs: values[idx] = values[idx].expand(broadcast_shape) template = values[tensor_idxs[0]] if len(scalar_idxs) > 0 and not isinstance(template, torch.autograd.Variable): raise ValueError(('Input arguments containing instances of numbers.Number and torch.Tensor ' 'are not currently supported. Use torch.autograd.Variable instead of torch.Tensor')) for idx in scalar_idxs: values[idx] = template.new(template.size()).fill_(values[idx]) else: for idx in scalar_idxs: values[idx] = variable(values[idx]) return values
import matplotlib.pyplot as plt import pylab from torch.autograd import variable batch_n = 64 hidden_layer = 100 input_data = 1000 output_data = 10 models = torch.nn.Sequential(torch.nn.Linear(input_data, hidden_layer), torch.nn.ReLU(), torch.nn.Linear(hidden_layer, output_data)) epoch_n = 10 learning_rate = 1e-3 loss_fn = torch.nn.MSELoss() x = variable(torch.randn(batch_n, input_data)) y = variable(torch.randn(batch_n, output_data)) optimzer = torch.optim.Adam(models.parameters(), lr=learning_rate) for epoch in range(epoch_n): y_pred = models(x) loss = loss_fn(y_pred, y) print("epoch:{}, loss:{}".format(epoch, loss.data)) optimzer.zero_grad() loss.backward() optimzer.step()
return output / dim loss_reference_fns = { 'KLDivLoss': kldivloss_reference, 'NLLLoss': nllloss_reference, 'NLLLossNd': nlllossNd_reference, 'SmoothL1Loss': smoothl1loss_reference, 'MultiLabelMarginLoss': multilabelmarginloss_reference, 'HingeEmbeddingLoss': hingeembeddingloss_reference, 'SoftMarginLoss': softmarginloss_reference, 'MultiMarginLoss': multimarginloss_reference, } sample_scalar = variable(0) # TODO: replace this with torch.rand() when Variables and tensors are merged; # this function will correctly handle scalars (i.e. empty tuple sizes) for now. def torch_rand(sizes, requires_grad=False): if len(sizes) == 0: return torch.testing.rand_like(sample_scalar, requires_grad=requires_grad) else: return Variable(torch.rand(*sizes), requires_grad=requires_grad) # TODO: replace this with torch.randn() when Variables and tensors are merged; # this function will correctly handle scalars (i.e. empty tuple sizes) for now. def torch_randn(sizes, requires_grad=False): if len(sizes) == 0:
import torch from torch.autograd import variable # now its option function (torch.tensor does the jobs) data = [1.0, 2, 26, 28, 4] b = variable(data) # there is no concepts of variable now so forget it print('the variable is', b) c = torch.tensor(data, requires_grad=True) # they both are same print('c is torch tesnor', c) print('cdata is', c.data) '''# print("the data of tensor CC is ", c.data) print("the data of tensor B is ", b.data[:]) e=c.data[3] print(float(e))#in order to conver one elemet from tensor to scaler we can use float(val) ot int(val) :val=tensor print('eitem is',e.item())#or simply use item () to for convering into scaler f= c.tolist()#conver tesnor list to common list print('list conversion',f) g=c.numpy()#conver tensor to common array list print("numpy conversion",g)''' # I came to know the differnce between Torch.tensor and Variable print(c.grad) # find the grad of the c(differentiation of c) # *************************************************************** # understanding the gradient in pytorch # f(x)= 4x+2 # df/dx= 4(differnetion of (4x+2 is 4))
def init_weights(self): # weight initialization for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.xavier_normal_(m.weight) if m.bias is not None: nn.init.zeros_(m.bias) elif isinstance(m, nn.BatchNorm2d): nn.init.ones_(m.weight) nn.init.zeros_(m.bias) elif isinstance(m, nn.Linear): nn.init.normal_(m.weight, 0, 0.01) nn.init.zeros_(m.bias) def mobilenet_v2(pretrained=False, **kwargs): return MobileNetV2(**kwargs) if __name__ == "__main__": x = variable(torch.randn(10, 3, 300, 300)) net = MobileNetV2().cuda() net.init_weights() output = net(x.cuda()) print("-----------------------------------") for output_ in output: print(output_.size()) pass
def generateBatchInput(self, corpus, corpusMeta, batchSize): if not corpus.prepared: self.prepareTraining(corpus, corpusMeta) word2Idx = corpusMeta.word2idx tag2Idx = corpusMeta.tag2Idx char2Idx = corpusMeta.char2Idx fwbigram2idx = corpusMeta.fwbigram2idx bwbigram2idx = corpusMeta.bwbigram2idx # inputBatches = [] totalSize = len(corpus.utterances) for batchId in tqdm(range(totalSize // batchSize), disable=GLOBAL_VARIABLE.DISABLE_TQDM): batchUtts = corpus.utterances[batchId * batchSize:(batchId + 1) * batchSize] wordSeqLengths = torch.LongTensor( list(map(lambda utt: len(utt.tokens), batchUtts))) nodeNums = torch.LongTensor(batchSize) maxSeqLength = wordSeqLengths.max() if self.useChar: charSeqLengths = torch.LongTensor([ list(map(lambda tok: len(tok.chars), utt.tokens)) + [1] * (int(maxSeqLength) - len(utt.tokens)) for utt in batchUtts ]) maxCharLength = charSeqLengths.max() charSeqTensor = autograd.Variable( torch.zeros( (batchSize, maxSeqLength, maxCharLength))).long() else: charSeqLengths = None maxCharLength = None charSeqTensor = None wordSeqTensor = autograd.Variable( torch.zeros((batchSize, maxSeqLength))).long() if self.useBigram: fwbigramTensor = autograd.Variable( torch.zeros([batchSize, maxSeqLength])).long() bwbigramTensor = autograd.Variable( torch.zeros([batchSize, maxSeqLength])).long() else: fwbigramTensor = None bwbigramTensor = None tagSeqTensor = autograd.Variable( torch.zeros((batchSize, maxSeqLength))).long() seq2NodeTensor = autograd.Variable( torch.zeros([batchSize, maxSeqLength, 1])).long() node2SeqTensor = autograd.Variable( torch.zeros([batchSize, maxSeqLength, 1])).long() gazNodeLengths = [] gazNode2Idxs = [] maxTotalNode = max([utt.totalNode for utt in batchUtts]) maxMainNode = max([utt.mainNode for utt in batchUtts]) for gazIdx in range(self.gaNum): batchMaxNodeLength = max( [utt.gazGraph[gazIdx][0] for utt in batchUtts]) gazNode2Idxs.append( autograd.Variable( torch.zeros([batchSize, batchMaxNodeLength])).long()) gazNodeLengths.append( autograd.variable(torch.zeros([batchSize])).long()) if self.gaNum > 0: gazBlankState = torch.zeros( [batchSize, maxTotalNode - maxMainNode]) else: gazBlankState = None mainEdges = [] for idx in range(batchSize): nNode, node2seq, seq2node, edges = batchUtts[idx].mainGraph nodeNums[idx] = nNode node2SeqTensor[idx, :nNode, 0] = torch.LongTensor(node2seq) node2SeqTensor[idx, nNode:, 0] = maxSeqLength - 1 seq2NodeTensor[idx, :wordSeqLengths[idx], 0] = torch.LongTensor(seq2node) mainEdges.append(edges) adjMatrixTensor = autograd.Variable( torch.zeros( [batchSize, maxTotalNode, maxTotalNode * self.edgeTypes])) for idx in range(batchSize): mainTypes = len(mainEdges[idx]) for typeIdx in range(len(mainEdges[idx])): for edge in mainEdges[idx][typeIdx]: adjMatrixTensor[idx, edge[0], maxTotalNode * typeIdx + edge[1]] = edge[2] for gazIdx in range(self.gaNum): nNode, node2idx, edges = batchUtts[idx].gazGraph[gazIdx] gazNodeLengths[gazIdx][idx] = nNode gazNode2Idxs[gazIdx][idx][:nNode] = torch.LongTensor( node2idx) for typeIdx in range(len(edges)): for edge in edges[typeIdx]: adjMatrixTensor[idx, edge[0], maxTotalNode * (mainTypes + typeIdx) + edge[1]] = edge[2] wordSeqTensor[idx, :wordSeqLengths[idx]] = torch.LongTensor([ word2Idx.get(word.text, corpusMeta.unk) for word in batchUtts[idx].tokens ]) tagSeqTensor[idx, :wordSeqLengths[idx]] = torch.LongTensor( [tag2Idx[word.tag] for word in batchUtts[idx].tokens]) if self.useBigram: fwbigramTensor[ idx, :wordSeqLengths[idx]] = torch.LongTensor([ fwbigram2idx.get(word.fwbigram, corpusMeta.unk) for word in batchUtts[idx].tokens ]) bwbigramTensor[ idx, :wordSeqLengths[idx]] = torch.LongTensor([ bwbigram2idx.get(word.bwbigram, corpusMeta.unk) for word in batchUtts[idx].tokens ]) if self.useChar: for wordIdx in range(wordSeqLengths[idx]): charSeqTensor[idx, wordIdx, :charSeqLengths[ idx, wordIdx]] = torch.LongTensor([ char2Idx.get(char, corpusMeta.unk) for char in batchUtts[idx].tokens[wordIdx].chars ]) for wordIdx in range(wordSeqLengths[idx], maxSeqLength): charSeqTensor[idx, wordIdx, 0:1] = torch.LongTensor( [char2Idx['<PAD>']]) yield [ wordSeqTensor, tagSeqTensor, wordSeqLengths, charSeqTensor, charSeqLengths, seq2NodeTensor, node2SeqTensor, adjMatrixTensor, gazNode2Idxs, gazNodeLengths, nodeNums, gazBlankState, fwbigramTensor, bwbigramTensor ] if (totalSize // batchSize) * batchSize < totalSize: startId = (totalSize // batchSize) * batchSize lastBatchSize = totalSize - startId batchUtts = corpus.utterances[startId:totalSize] wordSeqLengths = torch.LongTensor( list(map(lambda utt: len(utt.tokens), batchUtts))) nodeNums = torch.LongTensor(lastBatchSize) maxSeqLength = wordSeqLengths.max() if self.useChar: charSeqLengths = torch.LongTensor([ list(map(lambda tok: len(tok.chars), utt.tokens)) + [1] * (int(maxSeqLength) - len(utt.tokens)) for utt in batchUtts ]) maxCharLength = charSeqLengths.max() charSeqTensor = autograd.Variable( torch.zeros( (lastBatchSize, maxSeqLength, maxCharLength))).long() else: charSeqLengths = None maxCharLength = None charSeqTensor = None wordSeqTensor = autograd.Variable( torch.zeros((lastBatchSize, maxSeqLength))).long() if self.useBigram: fwbigramTensor = autograd.Variable( torch.zeros([lastBatchSize, maxSeqLength])).long() bwbigramTensor = autograd.Variable( torch.zeros([lastBatchSize, maxSeqLength])).long() else: fwbigramTensor = None bwbigramTensor = None tagSeqTensor = autograd.Variable( torch.zeros((lastBatchSize, maxSeqLength))).long() seq2NodeTensor = autograd.Variable( torch.zeros([lastBatchSize, maxSeqLength, 1])).long() node2SeqTensor = autograd.Variable( torch.zeros([lastBatchSize, maxSeqLength, 1])).long() gazNodeLengths = [] gazNode2Idxs = [] maxTotalNode = max([utt.totalNode for utt in batchUtts]) maxMainNode = max([utt.mainNode for utt in batchUtts]) for gazIdx in range(self.gaNum): batchMaxNodeLength = max( [utt.gazGraph[gazIdx][0] for utt in batchUtts]) gazNode2Idxs.append( autograd.Variable( torch.zeros([lastBatchSize, batchMaxNodeLength])).long()) gazNodeLengths.append( autograd.variable(torch.zeros([lastBatchSize])).long()) if self.gaNum > 0: gazBlankState = torch.zeros( [lastBatchSize, maxTotalNode - maxMainNode]) else: gazBlankState = None mainEdges = [] for idx in range(lastBatchSize): nNode, node2seq, seq2node, edges = batchUtts[idx].mainGraph nodeNums[idx] = nNode node2SeqTensor[idx, :nNode, 0] = torch.LongTensor(node2seq) node2SeqTensor[idx, nNode:, 0] = maxSeqLength - 1 seq2NodeTensor[idx, :wordSeqLengths[idx], 0] = torch.LongTensor(seq2node) mainEdges.append(edges) adjMatrixTensor = autograd.Variable( torch.zeros([ lastBatchSize, maxTotalNode, maxTotalNode * self.edgeTypes ])) for idx in range(lastBatchSize): mainTypes = len(mainEdges[idx]) for typeIdx in range(len(mainEdges[idx])): for edge in mainEdges[idx][typeIdx]: adjMatrixTensor[idx, edge[0], maxTotalNode * typeIdx + edge[1]] = edge[2] for gazIdx in range(self.gaNum): nNode, node2idx, edges = batchUtts[idx].gazGraph[gazIdx] gazNodeLengths[gazIdx][idx] = nNode gazNode2Idxs[gazIdx][idx][:nNode] = torch.LongTensor( node2idx) for typeIdx in range(len(edges)): for edge in edges[typeIdx]: adjMatrixTensor[idx, edge[0], maxTotalNode * (mainTypes + typeIdx) + edge[1]] = edge[2] wordSeqTensor[idx, :wordSeqLengths[idx]] = torch.LongTensor([ word2Idx.get(word.text, corpusMeta.unk) for word in batchUtts[idx].tokens ]) tagSeqTensor[idx, :wordSeqLengths[idx]] = torch.LongTensor( [tag2Idx[word.tag] for word in batchUtts[idx].tokens]) if self.useBigram: fwbigramTensor[ idx, :wordSeqLengths[idx]] = torch.LongTensor([ fwbigram2idx.get(word.fwbigram, corpusMeta.unk) for word in batchUtts[idx].tokens ]) bwbigramTensor[ idx, :wordSeqLengths[idx]] = torch.LongTensor([ bwbigram2idx.get(word.bwbigram, corpusMeta.unk) for word in batchUtts[idx].tokens ]) if self.useChar: for wordIdx in range(wordSeqLengths[idx]): charSeqTensor[idx, wordIdx, :charSeqLengths[ idx, wordIdx]] = torch.LongTensor([ char2Idx.get(char, corpusMeta.unk) for char in batchUtts[idx].tokens[wordIdx].chars ]) for wordIdx in range(wordSeqLengths[idx], maxSeqLength): charSeqTensor[idx, wordIdx, 0:1] = torch.LongTensor( [char2Idx['<PAD>']]) yield [ wordSeqTensor, tagSeqTensor, wordSeqLengths, charSeqTensor, charSeqLengths, seq2NodeTensor, node2SeqTensor, adjMatrixTensor, gazNode2Idxs, gazNodeLengths, nodeNums, gazBlankState, fwbigramTensor, bwbigramTensor ]
epoch_plt = [] train_acc_plt = [] test_acc_plt = [] start_time = datetime.datetime.now() for epoch in range(n_epochs): running_loss = 0.0 running_correct = 0.0 print('Epoch {} / {}'.format(epoch, n_epochs)) print('-' * 10) i = 0 for data in data_loader_train: #print('i:{}'.format(i)) x_train, y_train = data if is_cuda: x_train, y_train = variable(x_train).cuda(), variable( y_train).cuda() else: x_train, y_train = variable(x_train), variable(y_train) # print(x_train.shape) outputs = model(x_train, 8) # print(outputs.type()) # _ -- 最大值, pred -- 最大值序号 _, pred = torch.max(outputs.data, 1) # print(pred.type()) optimizer.zero_grad() loss = cost(outputs, y_train) # print(loss.type())
# loss.backward() # # w1.data -= learning_rate * w1.grad.data # w2.data -= learning_rate * w2.grad.data # # w1.grad.data.zero_() # w2.grad.data.zero_() #使用torch.nn包来搭建 #这里的Input_data与output_data都是数据的维度 batch_n = 100 input_data = 1000 hidden_layer = 100 output_data = 10 x = variable(torch.randn(batch_n, input_data), requires_grad=False) y = variable(torch.randn(batch_n, output_data), requires_grad=False) #权值 #w1 = variable(torch.randn(input_data,hidden_layer),requires_grad=True) #w2 = variable(torch.randn(hidden_layer,output_data),requires_grad=True) models = torch.nn.Sequential( #输入层到隐藏层的线性变换 torch.nn.Linear(input_data, hidden_layer), #激活函数 torch.nn.ReLU(), #隐藏层到输出层的线性变换 torch.nn.Linear(hidden_layer, output_data)) print(models) learning_rate = 1e-3
return output.sum() / dim return output / dim loss_reference_fns = { 'KLDivLoss': kldivloss_reference, 'NLLLoss': nllloss_reference, 'NLLLossNd': nlllossNd_reference, 'SmoothL1Loss': smoothl1loss_reference, 'MultiLabelMarginLoss': multilabelmarginloss_reference, 'HingeEmbeddingLoss': hingeembeddingloss_reference, 'SoftMarginLoss': softmarginloss_reference, 'MultiMarginLoss': multimarginloss_reference, } sample_scalar = variable(0) # TODO: replace this with torch.rand() when Variables and tensors are merged; # this function will correctly handle scalars (i.e. empty tuple sizes) for now. def torch_rand(sizes, requires_grad=False): if len(sizes) == 0: return torch.testing.rand_like(sample_scalar, requires_grad=requires_grad) else: return Variable(torch.rand(*sizes), requires_grad=requires_grad) # TODO: replace this with torch.randn() when Variables and tensors are merged; # this function will correctly handle scalars (i.e. empty tuple sizes) for now. def torch_randn(sizes, requires_grad=False):
#Training the network criterion = nn.BCELoss() optimD = optim.Adam(netD.parameters(),lr = 0.0002,betas = (0.5,0.999)) optimG = optim.Adam(netG.parameters(),lr = 0.0002,betas = (0.5,0.999)) for epoch in range(25): for i,data in enumerate(dataloader,start = 0): #1st step updating the weights of the neural network of the discriminator netD.zero_grad() #train the discriminator with real image real,_ = data #we dont want the labels here so thats why we are setting it to _ input = variable(real) #to convert the real image into a torch variable target = variable(torch.ones(input.size()[0])) #we have to set targets to 1 as we are training the real image, so basically we have to create a torch array type of structure which will have 1's with the dimension equal to the minibatch size of real images output = netD(input) #forward pass through discriminator errD_real =criterion(output,target) #calc the real_errD loss #train the discriminator with fake image noise = variable(torch.randn(input.size()[0],100, 1, 1)) #1,1 stands for the dimension of each value fake = netG(noise) #forward pass through generator to generate the fake images target = variable(torch.zeros(input.size()[0])) #we have to set targets to 0 as we are training the fake image, so basically we have to create a torch array type of structure which will have 0's with the dimension equal to the minibatch size of real images output = netD(fake.detach()) #to remove the detach the gradients , so that no gradients are backpropagated through this variable errD_fake = criterion(output,target) #Backpropagation the total error errD = errD_real + errD_fake errD.backward() optimD.step() #Applies the optimizer on the Neural network and updates the weights of the discriminator depending on how much it is responsible for total loss error
BATCH_SIZE = 64 TIME_STEP = 6 INPUT_SIZE = 7 LR = 0.01 if torch.cuda.is_available(): USE_GPU = True else: USE_GPU = False if __name__ == '__main__': dataset = connect4(numerical=True, one_hot=False) _, _, train_dataset_num = data_label_num(dataset.train_x, dataset.train_y) test_data, test_label, _ = data_label_num(dataset.test_x, dataset.test_y) if USE_GPU: test_data_x = variable(test_data).cuda() test_data_y = variable(test_label).cuda() else: test_data_x = variable(test_data) test_data_y = variable(test_label) rnn = RNN() if USE_GPU: rnn.cuda() optimizer = torch.optim.Adam(rnn.parameters(), lr=LR) loss_func = nn.CrossEntropyLoss() macro_f1_score = [] kf = KFold(n_splits=5, shuffle=True, random_state=2020) for train_index, valid_index in kf.split(train_dataset_num): train_data = pd.DataFrame(train_dataset_num.copy()).drop(valid_index)
train_loader = DataLoader(train, batch_size=100, shuffle=True) # In[88]: #모형학습 #오차함수 객체 criterion = nn.CrossEntropyLoss() #최적화를 담당할 객체 optimizer = optim.SGD(model.parameters(), lr=0.01) #학습시작 for epoch in range(1000): total_loss = 0 #분할해 둔 데이터 꺼내오기 for train_x, train_y in train_loader: #계산 그래프 구성 train_x, train_y = variable(train_x), variable(train_y) #경사초기화 optimizer.zero_grad() #순전파 계산 output = model(train_x) #오차계산 loss = criterion(output, train_y) #역전파계산 loss.backward() #가중치 업데이트 optimizer.step() #누적 오차 계산 total_loss += loss.item() if (epoch + 1) % 100 == 0: print(epoch + 1, total_loss) #100회 반복마다 누적 오차 출력
def nll_loss_helper(input, target, weight, ignore_index): if target == ignore_index: return (variable(0), variable(0)) norm = 1 if weight is None else weight[target] result = -input[target] * norm return (result, norm)
def torch_rand(sizes): if len(sizes) == 0: return variable(0).uniform_() else: return Variable(torch.rand(*sizes))
def torch_randn(sizes): if len(sizes) == 0: return variable(0).normal_() else: return Variable(torch.randn(*sizes))