def get_sequence_from_user(max_sequence_length: int) -> Tuple[Tensor, Tensor]: """ Ask the user to enter a sequence of token ids and convert it to source token tensor and source mask tensor for feeding the model. """ enter_message = ( "\nEnter the desired source sequence token ids separated by spaces: ") # asking for user input and splitting it into a sequence of token ids: src_seq = list(map(int, input(enter_message).split())) n_tokens = len(src_seq) if n_tokens > max_sequence_length: # truncating the sequence if its length is higher than allowed: n_tokens = max_sequence_length src_seq = src_seq[:max_sequence_length] # padding the sequence if its length is lower than the maximum one and # converting it to the right format: src_seq = torch_cat( ( tensor(src_seq, dtype=torch_long), # noqa: E501 pylint: disable=not-callable torch_zeros((max_sequence_length - n_tokens), dtype=torch_long)), dim=-1) src_seq = torch_unsqueeze(input=src_seq, dim=0) # creating the sequence mask based on the padding done: src_seq_mask = torch_cat( (torch_ones((1, 1, n_tokens), dtype=torch_long), torch_zeros( (1, 1, max_sequence_length - n_tokens), dtype=torch_long)), dim=-1) return src_seq, src_seq_mask
def stft(signal, len_each_section, frac_overlap, padding, window=None): try: assert torch_is_tensor(signal) except: signal = torch_from_numpy(signal).double() if signal.is_contiguous() is False: LOGGER.debug('stft: signal is not contiguous') signal = signal.contiguous() if window is None: window = torch_ones(len_each_section, dtype=torch_float64) else: raise NotImplementedError('stft: window function {} has not been implemented'.format(window)) shift_length = round(len_each_section * (1. - frac_overlap)) # shift_length = 2 y, num_elements, num_beams = signal.shape num_frames = math_ceil((y - len_each_section + 1) / shift_length) startLocs = torch_arange(0, num_frames*shift_length, shift_length) num_elements_beams = num_elements*num_beams freq = torch_arange(padding)/padding # CHANGED: Recast stft result to float signal = signal.double() signal_stft = torch_stft(signal.view(y, num_elements_beams).permute(1, 0), len_each_section, window=window, hop_length=shift_length, center=False, onesided=False, normalized=False, pad_mode='constant') \ .float() \ .permute(1, 2, 0, 3) \ .view(len_each_section, num_frames, num_elements, num_beams, 2) del signal return { 'stft': signal_stft, 'freqs': freq, 'startOffsets': startLocs, 'len_each_section': len_each_section, 'padding': padding, 'win_info': window, 'frac_overlap': frac_overlap, 'shift_length': shift_length, }
def istft(stft_object): stft_data = stft_object['stft'] assert torch_is_tensor(stft_data) len_each_section, num_frames, num_elements, num_beams, real_imag = stft_data.size( ) num_elements_beams = num_elements * num_beams shift_length = stft_object['shift_length'] y, _, _ = stft_object['origSigSize'] len_each_section = stft_data.size(0) window = torch_ones(len_each_section, dtype=torch_float64) # CHANGED: Convert istft result back to double stft_data = stft_data.double() return torch_istft(stft_data.view(len_each_section, num_frames, \ num_elements_beams, real_imag).permute(2, 0, 1, 3), len_each_section, window=window, hop_length=shift_length, center=False, onesided=False, normalized=False, pad_mode='constant', length=y) \ .float() \ .view(num_elements, num_beams, y) \ .permute(2, 0, 1)
def expand(image, boxes, filler): original_h = image.size(1) original_w = image.size(2) max_scale = 1.5 scale = rand_uniform(1, max_scale) new_h = int(scale * original_h) new_w = int(scale * original_w) filler = FloatTensor(filler) new_image = torch_ones( (3, new_h, new_w), dtype=torch_float) * filler.unsqueeze(1).unsqueeze(1) left = randint(0, new_w - original_w) right = left + original_w top = randint(0, new_h - original_h) bottom = top + original_h new_image[:, top:bottom, left:right] = image new_boxes = boxes + FloatTensor([left, top, left, top]).unsqueeze(0) return new_image, new_boxes
def __init__(self, feature_dimension: int, epsilon: float = 1e-6) -> None: super(LayerNorm, self).__init__() self.alpha = Parameter(data=torch_ones((feature_dimension))) self.beta = Parameter(data=torch_zeros((feature_dimension))) self.epsilon = epsilon
n_encoder_blocks=6, n_decoder_blocks=6, representation_dimension=512, feedforward_dimension=2048, n_attention_heads=8, max_sequence_length=MAX_SEQUENCE_LENGTH, dropout_prob=0.1) # ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ # evaluating a single prediction before training: print("\nEvaluating a single prediction before training:") src_sequence = torch_unsqueeze( input=tensor(list(range(1, MAX_SEQUENCE_LENGTH + 1))), # noqa: E501 pylint: disable=not-callable # input=tensor([2] * MAX_SEQUENCE_LENGTH), # TODO # noqa: E501 pylint: disable=not-callable dim=0) src_sequence_mask = torch_ones((1, 1, MAX_SEQUENCE_LENGTH)) tgt_sequence_prediction = model.predict(src_sequences=src_sequence, src_masks=src_sequence_mask, tgt_bos_token=1, decoding_method='greedy', gpu_if_possible=True) print_src_vs_tgt(src_seq=src_sequence[0], tgt_seq_prediction=tgt_sequence_prediction[0]) # ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ # training the model: print("\nTraining the model:") model.train_on_toy_copy_task(n_epochs=10, samples_per_epoch=30 * 20, mini_batch_size=30, label_smoothing_factor=0.0,
def _hyperrectangle_integration(lower, upper, correlation, maxpts, abseps, releps, info=False): # main differences with _parallel_CDF is that it handles complex lower/upper bounds # with infinite components and it returns information on the completion. d = correlation.size(-1) trind = tril_indices(d, -1) ### Infere batch_shape lnone = lower is None unone = upper is None bothNone = lnone and unone if bothNone: pre_batch_shape = [] # broadcast lower and upper to get pre_batch_shape elif not lnone and not unone: pre_batch_shape = broadcast(lower, upper).shape[:-1] cdf = False else: # case were we compute P(Y<x): lower is [-inf, ..., -inf] and upper = x. # Invert lower and upper if it is upper=None cdf = True if unone: upper = -lower pre_batch_shape = upper.shape[:-1] cor = ascontiguousarray(correlation.numpy()[..., trind[0], trind[1]], dtype=float64) # broadcast all lower, upper, correlation batch_shape = broadcast_shape(pre_batch_shape, cor.shape[:-1]) dtype = correlation.dtype device = correlation.device if bothNone: # trivial case if info: return (torch_ones(*batch_shape, dtype=dtype, device=device), torch_zeros(*batch_shape, dtype=dtype, device=device), torch_zeros(*batch_shape, dtype=torch_int32, device=device)) else: return torch_ones(*batch_shape, dtype=dtype, device=device) else: if d == 1: val = Phi(upper.squeeze(-1)) if cdf else Phi( upper.squeeze(-1)) - Phi(lower.squeeze(-1)) if info: return (val, torch_zeros(*batch_shape, dtype=dtype, device=device), torch_zeros(*batch_shape, dtype=torch_int32, device=device)) else: return val dd = d * (d - 1) // 2 # size of flatten correlation matrix # Broadcast: c = broadcast_to(cor, batch_shape + [dd]).reshape(-1, dd) N = c.shape[0] # batch number upp = upper.numpy().astype(float64) shape1 = batch_shape + [d] u = broadcast_to(upp, shape1).reshape(N, d) infu = u == Inf if cdf: l = np_empty( (N, d), dtype=float64) # never used but required by Fortran code i = np_zeros((N, d), dtype=int32) i.setflags(write=1) i[infu] = -1 # basically ignores these componenents else: low = lower.numpy().astype(float64) l = broadcast_to(low, shape1).reshape(N, d) i = full((N, d), 2, dtype=int32) infl = l == -Inf i.setflags(write=1) i[infl] = 0 i[infu] = 1 i[infl * infu] = -1 # basically ignores these componenents # infin is a int vector to pass to the fortran code controlling the integral limits # if INFIN(I) < 0, Ith limits are (-infinity, infinity); # if INFIN(I) = 0, Ith limits are (-infinity, UPPER(I)]; # if INFIN(I) = 1, Ith limits are [LOWER(I), infinity); # if INFIN(I) = 2, Ith limits are [LOWER(I), UPPER(I)]. # TODO better to build res and assign or build-reshap? res = _parallel_genz_bretz(l, u, i, c, maxpts, abseps, releps, info) if info: values, errors, infos = res return (tensor(values, dtype=dtype, device=device).view(batch_shape), tensor(errors, dtype=dtype, device=device).view(batch_shape), tensor(infos, dtype=torch_int32, device=device).view(batch_shape)) else: return tensor(res, dtype=dtype, device=device).view(batch_shape) """ l.setflags(write=1)
def _build_targets(self, predictions, target_data, feature_map_width, feature_map_height): batch_size = target_data.size(0) number_of_pixels = feature_map_height * feature_map_width anchors_over_pixels = self.num_anchors * number_of_pixels default_size = (batch_size, self.num_anchors, feature_map_height, feature_map_width) _1obj = torch_zeros(*default_size) _1noobj = torch_ones(*default_size) target_center_x_values = torch_zeros(*default_size) target_center_y_values = torch_zeros(*default_size) target_width_values = torch_zeros(*default_size) target_height_values = torch_zeros(*default_size) target_confidence_score_values = torch_zeros(*default_size) target_class_values = torch_zeros(*default_size) for image_index in range(batch_size): start_index = image_index * anchors_over_pixels end_index = (image_index + 1) * anchors_over_pixels predicted_bounding_boxes = predictions[start_index:end_index].t() ious = torch_zeros(anchors_over_pixels) for t in range(self.max_object): if target_data[image_index][t * 5 + 1] == -1: break ground_truth_center_x = target_data[image_index][ t * 5 + 1] * feature_map_width ground_truth_center_y = target_data[image_index][ t * 5 + 2] * feature_map_height ground_truth_width = target_data[image_index][ t * 5 + 3] * feature_map_width ground_truth_height = target_data[image_index][ t * 5 + 4] * feature_map_height ground_truth_bounding_boxes = FloatTensor([ ground_truth_center_x, ground_truth_center_y, ground_truth_width, ground_truth_height ]) ground_truth_bounding_boxes = ground_truth_bounding_boxes.repeat( anchors_over_pixels, 1).t() ious = torch_max( ious, intersection_over_union(True, predicted_bounding_boxes, ground_truth_bounding_boxes, is_corner_coordinates=False)) # https://github.com/marvis/pytorch-yolo2/issues/121#issuecomment-436388664 _1noobj[image_index][torch_reshape(ious, ( self.num_anchors, feature_map_height, feature_map_width)) > self.ignore_threshold] = 0 for image_index in range(batch_size): for t in range(self.max_object): if target_data[image_index][t * 5 + 1] == -1: break anchor_index, ground_truth_width, ground_truth_height = self._find_most_matching_anchor( feature_map_width, feature_map_height, image_index, t, target_data) ground_truth_center_x_pixel, ground_truth_center_y_pixel, ground_truth_bounding_box = \ self._compose_ground_truth_data(feature_map_width, feature_map_height, ground_truth_height, ground_truth_width, image_index, t, target_data) predicted_bounding_box = predictions[ image_index * anchors_over_pixels + anchor_index * number_of_pixels + ground_truth_center_y_pixel * feature_map_width + ground_truth_center_x_pixel] iou = intersection_over_union(False, ground_truth_bounding_box, predicted_bounding_box, is_corner_coordinates=False) _1obj[image_index][anchor_index][ground_truth_center_y_pixel][ ground_truth_center_x_pixel] = 1 _1noobj[image_index][anchor_index][ ground_truth_center_y_pixel][ ground_truth_center_x_pixel] = 0 target_center_x_values, target_center_y_values, target_width_values, target_height_values, \ target_confidence_score_values, target_class_values = self._set_target_values( feature_map_width, feature_map_height, image_index, t, target_data, anchor_index, iou, ground_truth_center_x_pixel, ground_truth_center_y_pixel, ground_truth_height, ground_truth_width, target_center_x_values, target_center_y_values, target_class_values, target_confidence_score_values, target_height_values, target_width_values) return _1obj, _1noobj, target_center_x_values, target_center_y_values, target_width_values, \ target_height_values, target_confidence_score_values, target_class_values
def predict( # pylint: disable=too-many-arguments self, src_sequences: Tensor, src_masks: Tensor, tgt_bos_token: int, decoding_method: str = 'greedy', gpu_if_possible: bool = True) -> Tensor: """ Predict target token sequences from source token sequences. """ # selecting the device handling computations: device = select_device(gpu_if_possible=gpu_if_possible) # moving model parameters and buffers to such device: self.model.to(device) # moving inputs to such device: src_sequences = src_sequences.to(device) src_masks = src_masks.to(device) # switching to inference mode: self.model.eval() if decoding_method == 'greedy': # greedy decoding: # computing encoder outputs, i.e. encoded representations of # source tokens - from dimensionality (samples, tokens) to # dimensionality (samples, tokens, features): src_encoded_tokens = self.model.encode(src_tokens=src_sequences, src_mask=src_masks) # initializing predicted output sequences: cumulative_tgt_sequences = torch_ones((1, 1), requires_grad=False)\ .fill_(value=tgt_bos_token).type_as(src_sequences) # for each target position, the respective token is sequentially # predicted, given the decoder auto-regressive predictive nature - # for all sequences at the same time: for _ in range(self.max_sequence_length - 1): # computing logits - from dimensionality (samples, tokens, # features) to dimensionality (samples, tokens, features): next_token_logits = self.model.decode( src_encoded_tokens=src_encoded_tokens, src_mask=src_masks, tgt_tokens=cumulative_tgt_sequences, tgt_mask=allowed_positions_to_attend( # positions to attend equal computed target tokens: n_positions=cumulative_tgt_sequences.size(1)).to( device)) # turning the logits of next (last) tokens in the sequences # into log-probabilities - from dimensionality (samples, # tokens, features) to dimensionality (samples, features): next_token_log_probabilities = self.model.log_softmax_layer( next_token_logits[:, -1] # next (last) tokens ) # discretizing probabilities to predicted tokens - from # dimensionality (samples, features) to dimensionality # (samples): next_tokens = torch_max(next_token_log_probabilities, dim=1).indices[0] # concatenating the newly predicted tokens to the sequences of # already predicted tokens: cumulative_tgt_sequences = torch_cat( (cumulative_tgt_sequences, torch_ones( (1, 1)).type_as(src_sequences).fill_(next_tokens)), dim=1) # FIXME: shapes not understood # TODO: truncate the different predicted sequences in the # mini-batch from their respective first padding token on return cumulative_tgt_sequences raise NotImplementedError("Unavailable decoding method: " + decoding_method)
def multivariate_normal_cdf(lower=None, upper=None, loc=None, covariance_matrix=None, scale_tril=None, method="GenzBretz", nmc=200, maxpts=25000, abseps=0.001, releps=0, error_info=False): """Compute rectangle probabilities for a multivariate normal random vector Z ``P(l_i < Z_i < u_i, i = 1,...,d)``. Probability values can be returned with closed-form backward derivatives. Parameters ---------- lower : torch.Tensor, optional Lower integration limits. Can have batch shape. The last dimension is the dimension of the random vector. Default is ``None`` which is understood as minus infinity for all components. Values ``- numpy.Inf`` are supported, e.g. if only few components have an infinite boundary. upper : torch.Tensor, optional Upper integration limits. See `lower`. loc : torch.Tensor, optional Mean of the Gaussian vector. Default is zeros. covariance_matrix : torch.Tensor, optional Covariance matrix of the Gaussian vector. Must be provided if `scale_tril` is not. scale_tril : torch.Tensor, optional A lower triangular root of the covariance matrix of the Gaussian vector (e.g. a Cholesky factor). Must be provided if `covariance_matrix` is not. The method ``'GenzBretz'``, needs the covariance matrix and it will be computed from `scale_tril`. method : :obj: str, optional Method deployed for the integration. Either ``'MonteCarlo'`` or ``'GenzBretz'``. nmc : :obj: int, optional Number of Monte Carlo samples. maxpts :obj: int, optional Maximum number of integration points in the Fortran routine. abseps :obj: float, optional Absolute error tolerance. releps :obj: float, optional Relative error tolerance. error_info :obj: bool, optional Should an estimation of the integration error be returned. Not compatible with autograd. Returns ------- value : torch.Tensor The probability of the event ``lower < Y < upper``, with ``Y`` a Gaussian vector defined by `loc` and `covariance_matrix` (or `scale_tril`). Closed form derivative are implemented if `lower`, `upper`, `loc`, `covariance_matrix` or `scale_tril` require a gradient. error : torch.Tensor The estimated error for each component of `value`. **Returned only if** `error_info` is ``True``. info : torch.Tensor Tensor of type ``int32`` informing on the execution for each component. - If ``0``, normal completion with ``error < abseps`` - If ``1``, completion with ``error > abseps`` and (for ``method = 'GenzBretz'``) all maxpts evaluation budget is depleted. - If ``2``, N > 1000 or N < 1 (only for ``method = 'GenzBretz'``) - If ``3``, `covariance_matrix` is not positive semi-definite (only for ``method = 'GenzBretz'``) **Returned only if** `error_info` is ``True``. Notes ------- Parameters `lower`, `upper` and `covariance_matrix` (or `scale_tril`), as well as the returns `value`, `error` and `info` are broadcasted to their common batch shape. See PyTorch' `broadcasting semantics <https://pytorch.org/docs/stable/notes/broadcasting.html#broadcasting-semantics>`_. If any component of ``lower - upper`` is nonpositive, the function returns a null tensor with consistent shape. Method ``MonteCarlo`` uses Monte Carlo sampling for estimating `value`, whereas ``method = 'GenzBretz'`` call a Fortran routine [1]_. If `method` is ``MonteCarlo`` a Cholesky decomposition of the covariance matrix will be performed. Else if ``GenzBretz``, only the correlation matrix is computed and passed to the Fortran routine. The parameter `maxpts` can be used to limit the time. A suggested calibration strategy is to start with 1000 times the integration dimension, and then increase it if the returned `error` is too large. Partial derivative are computed using non-trivial closed form formula, see e.g. Marmin et al. [2]_, p 13. References ---------- .. [1] Alan Genz and Frank Bretz, "Comparison of Methods for the Computation of Multivariate t-Probabilities", Journal of Computational and Graphical Statistics 11, pp. 950-971, 2002. `Source code <http://www.math.wsu.edu/faculty/genz/software/fort77/mvtdstpack.f>`_. .. [2] Sébastien Marmin, Clément Chevalier and David Ginsbourger, "Differentiating the multipoint Expected Improvement for optimal batch design", International Workshop on Machine learning, Optimization and big Data, Taormina, Italy, 2015. `PDF <https://hal.archives-ouvertes.fr/hal-01133220v4/document>`_. Examples -------- >>> import torch >>> from torch.autograd import grad >>> n = 4 >>> x = 1 + torch.randn(n) >>> x.requires_grad = True >>> # Make a positive semi-definite matrix >>> A = torch.randn(n,n) >>> C = 1/n*torch.matmul(A,A.t()) >>> p = mvnorm.multivariate_normal_cdf(upper=x,covariance_matrix=C) >>> p tensor(0.3721, grad_fn=<MultivariateNormalCDFBackward>) >>> grad(p,(x,)) >>> (tensor([0.0085, 0.2510, 0.1272, 0.0332]),) """ if (covariance_matrix is not None) + (scale_tril is not None) != 1: raise ValueError( "Exactly one of covariance_matrix or scale_tril may be specified.") mat = scale_tril if covariance_matrix is None else covariance_matrix device, dtype = mat.device, mat.dtype d = mat.size(-1) if isinstance(lower, (int, float)): lower = Tensor.new_full((d, ), float(lower), dtype=dtype, device=device) if isinstance(upper, (int, float)): upper = Tensor.new_full((d, ), float(upper), dtype=dtype, device=device) lnone = lower is None unone = upper is None if not lnone and lower.max() == -Inf: lower = None lnone = True if not unone and upper.min() == Inf: upper = None unone = True if method == "MonteCarlo": # Monte Carlo estimation if loc is None: loc = torch_zeros(d, device=device, dtype=dtype) p = MultivariateNormal(loc=loc, scale_tril=scale_tril, covariance_matrix=covariance_matrix) r = nmc % 5 N = nmc if r == 0 else nmc + 5 - r # rounded to the upper multiple of 5 Y = p.sample(Size([N])) if lnone and unone: error = torch_zeros(p.batch_shape, device=device, dtype=dtype) if error_info else -1 info = torch_zeros(p.batch_shape, device=device, dtype=int32) if error_info else -1 value = torch_ones(p.batch_shape, device=device, dtype=dtype) else: if lnone: Z = (Y < upper).prod(-1) else: Z = (Y > lower).prod(-1) if unone else (Y < upper).prod(-1) * ( Y > lower).prod(-1) if error_info: # Does NOT slow down significatively booleans = Z.view( N // 5, 5, *Z.shape[1:] ) # divide in 5 groups to have an idea of the precision values = ((booleans.sum(0).type(dtype))) / N * 5 value = values.mean(0) std = values.var(0).sqrt() error = 1.96 * std / sqrt5 # at 95 % info = (error > abseps).type(int32) else: value = Z.sum(0).type(dtype) / N error = info = -1 elif method == "GenzBretz": # Fortran routine if (d > 1000): raise ValueError("Only dimensions below 1000 are allowed. Got " + str(d) + ".") # centralize the problem uppe = upper if loc is None else None if unone else upper - loc lowe = lower if loc is None else None if lnone else lower - loc c = matmul(scale_tril, scale_tril.transpose( -1, -2)) if covariance_matrix is None else covariance_matrix if (not unone and uppe.requires_grad) or ( not lnone and lowe.requires_grad) or mat.requires_grad: if error_info: raise ValueError( "Option 'error_info' is True, and one of x, loc, covariance_matrix or scale_tril requires gradient. With option 'GenzBretz', the estimation of CDF error is not compatible with autograd." ) error = info = -1 if lnone: upp = uppe elif unone: upp = -lowe else: raise ValueError( "For autograd with option 'GenzBretz', at least lower or upper should be None (or with all components infinite)." ) value = CDFapp(upp, c, maxpts, abseps, releps) else: if lnone and unone: value = torch_ones(c.shape[:-2], device=device, dtype=dtype) error = torch_zeros(c.shape[:-2], device=device, dtype=dtype) if error_info else -1 info = torch_zeros(c.shape[:-2], device=device, dtype=int32) if error_info else -1 else: stds = diagonal(c, dim1=-2, dim2=-1).sqrt() low, upp, corr = _cov2cor(lowe, uppe, c, stds) res = _hyperrectangle_integration(low, upp, corr, maxpts, abseps, releps, info=error_info) value, error, info = (res if error_info else (res, -1, -1)) else: raise ValueError( "The 'method=' should be either 'GenzBretz' or 'MonteCarlo'.") #if error_info and error > abseps: # warn("Estimated error is higher than abseps. Consider raising the computation budget (nmc for method='MonteCarlo' or maxpts for 'GenzBretz'). Switch 'error_info' to False to ignore.") if error_info: return value, error, info else: return value