def padding(nframes, x, y): # dirty hacky padding ba = (nframes - 1) / 2 # before // after x2 = copy.deepcopy(x) on_x2 = False x_f = np.zeros((x.shape[0], nframes * x.shape[1]), dtype='float64') print 'x_f shape:',x_f.shape for i in xrange(x.shape[0]): if y[i] == '!ENTER[2]' and y[i-1] != '!ENTER[2]': # TODO general case on_x2 = not on_x2 if on_x2: x2[i - ba:i,:] = 0.0 else: x[i - ba:i,:] = 0.0 if i+ba < y.shape[0] and '!EXIT' in y[i] and not '!EXIT' in y[i+ba]: # TODO general if on_x2: x2[i+ba:i+2*ba+1,:] = 0.0 else: x[i+ba:i+2*ba+1,:] = 0.0 if on_x2: x_f[i] = np.pad(x2[max(0, i - ba):i + ba + 1].flatten(), (max(0, (ba - i) * x.shape[1]), max(0, ((i+ba+1) - x.shape[0]) * x.shape[1])), 'constant', constant_values=(0,0)) else: x_f[i] = np.pad(x[max(0, i - ba):i + ba + 1].flatten(), (max(0, (ba - i) * x.shape[1]), max(0, ((i+ba+1) - x.shape[0]) * x.shape[1])), 'constant', constant_values=(0,0)) return x_f
def conv_backward_naive(dout, cache): """ A naive implementation of the backward pass for a convolutional layer. Inputs: - dout: Upstream derivatives. - cache: A tuple of (x, w, b, conv_param) as in conv_forward_naive Returns a tuple of: - dx: Gradient with respect to x - dw: Gradient with respect to w - db: Gradient with respect to b """ dx, dw, db = None, None, None x, w, b, conv_param = cache stride = conv_param['stride'] pad = conv_param['pad'] N, C, H, W = x.shape F, _, HH, WW = w.shape Hp = 1 + (H + 2 * pad - HH) / stride Wp = 1 + (W + 2 * pad - WW) / stride dx = np.zeros(x.shape) dw = np.zeros(w.shape) db = np.zeros(b.shape) for i in xrange(N): # for j in xrange(F): data = x[i] data = np.pad(data, ((0, 0), (pad, pad), (pad, pad)), 'constant') paded_dxi = np.pad(dx[i], ((0, 0), (pad, pad), (pad, pad)), 'constant') filter_vert_indices = 0 filter_hori_indices = 0 for s in xrange(Hp): filter_hori_indices = 0 for p in xrange(Wp): data_fragment = data[:, filter_vert_indices:filter_vert_indices+HH, filter_hori_indices:filter_hori_indices+WW] dw += np.einsum('i, jkl->ijkl', dout[i, :, s, p], data_fragment) # paded_dxi[:, filter_vert_indices:filter_vert_indices+HH, # filter_hori_indices:filter_hori_indices+WW] = \ # np.einsum('ijkl,i->jkl', w, dout[i, :, s, p]) # paded_dxi[:, filter_vert_indices:filter_vert_indices+HH, # filter_hori_indices:filter_hori_indices+WW] = \ # np.tensordot(w, dout[i, :, s, p], axes = ([0], [0])) for f in xrange(F): paded_dxi[:, filter_vert_indices:filter_vert_indices+HH, filter_hori_indices:filter_hori_indices+WW] \ += w[f] * dout[i, f, s, p] filter_hori_indices += stride filter_vert_indices += stride dx[i] = paded_dxi[:, pad:-pad, pad:-pad] db = np.einsum('ijkl->j', dout) # print(dx) ############################################################################# # TODO: Implement the convolutional backward pass. # ############################################################################# ############################################################################# # END OF YOUR CODE # ############################################################################# return dx, dw, db
def _2d_filter(mat, win2d, matsign, pad): """ Filtering an image using a 2D window. Parameters ---------- mat : 2D array of floats nrow : int Height of the window. ncol: int Width of the window. sigma: tuple of 2 floats Sigmas of the window. pad : int Padding. Returns ------- Filtered image. """ matpad = np.pad(mat, ((0, 0), (pad, pad)), mode='edge') matpad = np.pad(matpad, ((pad, pad), (0, 0)), mode='mean') (nrow, ncol) = matpad.shape matfilter = np.real(ifft2(fft2(matpad * matsign) * win2d) * matsign) return matfilter[pad:nrow - pad, pad:ncol - pad]
def eval(self, data, label, lens): predictions = [] vals = [] for i in range(data.shape[0]/self.batch_size): D = data[range(self.batch_size*i,self.batch_size*(i+1))] L = label[range(self.batch_size*i,self.batch_size*(i+1))] if lens is not None: l = lens[range(self.batch_size*i,self.batch_size*(i+1))] feed_dict={self.dataset:D, self.labels:L, self.lengths:l} else: feed_dict={self.dataset:D, self.labels:L} predictions.extend(self.sess.run(self.correct_prediction, feed_dict)) vals.extend(self.sess.run(tf.argmax(self.logits,1), feed_dict)) ## DO THE EXTRA last_chunk = self.batch_size*(i+1) gap = self.batch_size - (data.shape[0] - last_chunk) D = np.pad(data[last_chunk:], ((0,gap),(0,0)), mode='constant', constant_values=0) L = np.pad(label[last_chunk:], ((0,gap),(0,0)), mode='constant', constant_values=0) if lens is not None: l = np.pad(lens[last_chunk:], (0,gap), mode='constant', constant_values=0) feed_dict={self.dataset:D, self.labels:L, self.lengths:l} else: feed_dict={self.dataset:D, self.labels:L} predictions.extend(self.sess.run(self.correct_prediction, feed_dict)[:self.batch_size - gap]) vals.extend(self.sess.run(tf.argmax(self.logits,1), feed_dict)[:self.batch_size - gap]) print vals ## PRINT THE PREDICTONS return 100.0*sum(predictions)/len(predictions)
def padding(nframes, x, y): """ Dirty hacky padding for a minimum of nframes """ b_a = (nframes - 1) / 2 # before // after x_2 = copy.deepcopy(x) on_x_2 = False x_f = zeros((x.shape[0], nframes * x.shape[1]), dtype='float32') for i in xrange(x.shape[0]): if y[i] == '!ENTER[2]' and y[i-1] != '!ENTER[2]': # TODO general case on_x_2 = not on_x_2 if on_x_2: x_2[i - b_a:i, :] = 0.0 else: x[i - b_a:i, :] = 0.0 if i+b_a < y.shape[0] and '!EXIT' in y[i] and not '!EXIT' in y[i+b_a]: # TODO general case if on_x_2: x_2[i+b_a:i+2*b_a+1, :] = 0.0 else: x[i+b_a:i+2*b_a+1, :] = 0.0 if on_x_2: x_f[i] = pad(x_2[max(0, i - b_a):i + b_a + 1].flatten(), (max(0, (b_a - i) * x.shape[1]), max(0, ((i+b_a+1) - x.shape[0]) * x.shape[1])), 'constant', constant_values=(0, 0)) else: x_f[i] = pad(x[max(0, i - b_a):i + b_a + 1].flatten(), (max(0, (b_a - i) * x.shape[1]), max(0, ((i+b_a+1) - x.shape[0]) * x.shape[1])), 'constant', constant_values=(0, 0)) return x_f
def test_offset(): X,Y=np.mgrid[-5:5:0.05,-5:5:0.05] Z=np.sqrt(X**2+Y**2)+np.sin(X**2+Y**2) Z2 = Z.copy() for i in range(15): dx, dy = lmi.find_offset(Z, Z2) dx2, dy2 = lmi.find_offset(Z2, Z) assert_array_equal([dx, dx2], [0,0]) assert_array_equal([dy, dy2], [i, -i]) Z2 = np.pad(Z2, ((0,0),(1,0)), mode='constant')[:, :-1] Z2 = Z.copy() for i in range(15): dx, dy = lmi.find_offset(Z, Z2) dx2, dy2 = lmi.find_offset(Z2, Z) assert_array_equal([dx, dx2], [i,-i]) assert_array_equal([dy, dy2], [0,0]) Z2 = np.pad(Z2, ((1,0),(0,0)), mode='constant')[:-1, :] Z2 = Z.copy() for i in range(15): dx, dy = lmi.find_offset(Z, Z2) dx2, dy2 = lmi.find_offset(Z2, Z) assert_array_equal([dx, dx2], [i,-i]) assert_array_equal([dy, dy2], [i, -i]) Z2 = np.pad(Z2, ((1,0),(1,0)), mode='constant')[:-1, :-1]
def SMeval(self, DWi, DU, Dlens, DWj, keep_predictions=False): """ Runs eval on dev/test data with the option to return predictions or performance """ predictions = [] for i in range(len(DWi)/self.batch_size): batch_range = range(self.batch_size*i,self.batch_size*(i+1)) wi = DWi[batch_range] wj = DWj[batch_range] U = DU[batch_range] lens = Dlens[batch_range] feed_dict = {self.cur_world: wi, self.next_world: wj, self.inputs: U, self.lengths: lens} if keep_predictions: predictions.extend(self.sess.run(tf.argmax(self.logits,1), feed_dict)) else: predictions.extend(self.sess.run(self.correct_prediction, feed_dict)) ## Grab the extras last_chunk = self.batch_size*(i+1) gap = self.batch_size - (len(DWi) - last_chunk) wi = np.pad(DWi[last_chunk:], ((0,gap),(0,0), (0,0), (0,0)), mode='constant', constant_values=0) wj = np.pad(DWj[last_chunk:], ((0,gap),(0,0)), mode='constant', constant_values=0) U = np.pad(DU[last_chunk:], ((0,gap),(0,0)), mode='constant', constant_values=0) lens = np.pad(Dlens[last_chunk:], ((0,gap)), mode='constant', constant_values=0) feed_dict = {self.cur_world: wi, self.next_world: wj, self.inputs: U, self.lengths: lens} if keep_predictions: predictions.extend(self.sess.run(tf.argmax(self.logits,1), feed_dict)[:self.batch_size - gap]) return predictions else: predictions.extend(self.sess.run(self.correct_prediction, feed_dict)[:self.batch_size - gap]) return 100.0*sum(predictions)/len(predictions)
def test_uninterpolated_nan_regions(boundary, normalize_kernel): #8086 # Test NaN interpolation of contiguous NaN regions with kernels of size # identical and greater than that of the region of NaN values. # Test case: kernel.shape == NaN_region.shape kernel = Gaussian2DKernel(1, 5, 5) nan_centroid = np.full(kernel.shape, np.nan) image = np.pad(nan_centroid, pad_width=kernel.shape[0]*2, mode='constant', constant_values=1) with pytest.warns(AstropyUserWarning, match="nan_treatment='interpolate', however, NaN values detected " "post convolution. A contiguous region of NaN values, larger " "than the kernel size, are present in the input array. " "Increase the kernel size to avoid this."): result = convolve(image, kernel, boundary=boundary, nan_treatment='interpolate', normalize_kernel=normalize_kernel) assert(np.any(np.isnan(result))) # Test case: kernel.shape > NaN_region.shape nan_centroid = np.full((kernel.shape[0]-1, kernel.shape[1]-1), np.nan) # 1 smaller than kerenel image = np.pad(nan_centroid, pad_width=kernel.shape[0]*2, mode='constant', constant_values=1) result = convolve(image, kernel, boundary=boundary, nan_treatment='interpolate', normalize_kernel=normalize_kernel) assert(~np.any(np.isnan(result))) # Note: negation
def tile_images_make_tiles(data, padsize=1, padval=0, width=None, highlights = None): height,width = get_tiles_height_width(data.shape[0], desired_width = width) # Old one-way padding, no highlights #padding = ((0, width*height - data.shape[0]), (0, padsize), (0, padsize)) + ((0, 0),) * (data.ndim - 3) #data = np.pad(data, padding, mode='constant', constant_values=(padval, padval)) # New two-way padding with highlights #padding = ((0, width*height - data.shape[0]), (padsize, padsize), (padsize, padsize)) + ((0, 0),) * (data.ndim - 3) #print 'tile_images: data min,max =', data.min(), data.max() #padder = SmartPadder() ##data = np.pad(data, padding, mode=jy_pad_fn) #data = np.pad(data, padding, mode=padder.pad_function) #print 'padder.calls =', padder.calls # New new way, two-way padding with highlights if highlights is not None: assert len(highlights) == data.shape[0] padding = ((0, width*height - data.shape[0]), (padsize, padsize), (padsize, padsize)) + ((0, 0),) * (data.ndim - 3) # First pad with constant vals try: len(padval) except: padval = tuple((padval,)) assert len(padval) in (1,3), 'padval should be grayscale (len 1) or color (len 3)' if len(padval) == 1: data = np.pad(data, padding, mode='constant', constant_values=(padval, padval)) else: data = np.pad(data, padding, mode='constant', constant_values=(0, 0)) for cc in (0,1,2): # Replace 0s with proper color in each channel data[:padding[0][0], :, :, cc] = padval[cc] if padding[0][1] > 0: data[-padding[0][1]:, :, :, cc] = padval[cc] data[:, :padding[1][0], :, cc] = padval[cc] if padding[1][1] > 0: data[:, -padding[1][1]:, :, cc] = padval[cc] data[:, :, :padding[2][0], cc] = padval[cc] if padding[2][1] > 0: data[:, :, -padding[2][1]:, cc] = padval[cc] if highlights is not None: # Then highlight if necessary for ii,highlight in enumerate(highlights): if highlight is not None: data[ii,:padding[1][0],:,:] = highlight if padding[1][1] > 0: data[ii,-padding[1][1]:,:,:] = highlight data[ii,:,:padding[2][0],:] = highlight if padding[2][1] > 0: data[ii,:,-padding[2][1]:,:] = highlight # tile the filters into an image data = data.reshape((height, width) + data.shape[1:]).transpose((0, 2, 1, 3) + tuple(range(4, data.ndim + 1))) data = data.reshape((height * data.shape[1], width * data.shape[3]) + data.shape[4:]) data = data[0:-padsize, 0:-padsize] # remove excess padding return (height,width), data
def pad(x, nf, ma=0): """ pad x for nf frames with margin ma. """ ba = (nf - 1) / 2 # before/after if ma: ret = numpy.zeros((x.shape[0] - 2 * ma, x.shape[1] * nf), dtype=theano.config.floatX) if ba <= ma: for j in xrange(ret.shape[0]): ret[j] = x[j : j + 2 * ma + 1].flatten() else: for j in xrange(ret.shape[0]): ret[j] = numpy.pad( x[max(0, j - ba) : j + ba + 1].flatten(), (max(0, (ba - j) * x.shape[1]), max(0, ((j + ba + 1) - x.shape[0]) * x.shape[1])), "constant", constant_values=(0, 0), ) return ret else: ret = numpy.zeros((x.shape[0], x.shape[1] * nf), dtype=theano.config.floatX) for j in xrange(x.shape[0]): ret[j] = numpy.pad( x[max(0, j - ba) : j + ba + 1].flatten(), (max(0, (ba - j) * x.shape[1]), max(0, ((j + ba + 1) - x.shape[0]) * x.shape[1])), "constant", constant_values=(0, 0), ) return ret
def paddingAnswers(answerSheet1, blankSheet1): numRowsA, numColsA, numBandsA, dataTypeA = ipcv.dimensions(answerSheet1) numRowsB, numColsB, numBandsB, dataTypeB = ipcv.dimensions(blankSheet1) print numRowsB, numColsB if numBandsA == 3: answerSheet = cv2.cvtColor(answerSheet1, cv.CV_BGR2GRAY) elif numBandsA == 1: answerSheet = answerSheet1 if numBandsB == 3: blankSheet = cv2.cvtColor(blankSheet1, cv.CV_BGR2GRAY) elif numBandsB == 1: blankSheet = blankSheet1 pad = numpy.absolute(numRowsA - numColsA)/2.0 maxCount = numpy.max(blankSheet) if (numRowsA-numColsA) % 2 != 0: answerSheet = numpy.pad(answerSheet, ((0,0),(pad,pad+1)), 'constant', constant_values=((maxCount, maxCount),(maxCount,maxCount))) elif (numRowsA-numColsA) % 2 == 0: answerSheet = numpy.pad(answerSheet, ((0,0),(pad,pad)), 'constant', constant_values=((maxCount, maxCount),(maxCount,maxCount))) pad1 = numpy.absolute(numRowsB - numColsB)/2.0 maxCount = numpy.max(blankSheet) if (numRowsB-numColsB) % 2 != 0: blankSheet = numpy.pad(blankSheet, ((0,0),(pad1,pad1+1)), 'constant', constant_values=((maxCount, maxCount),(maxCount,maxCount))) elif (numRowsA-numColsA) % 2 == 0: blankSheet = numpy.pad(blankSheet, ((0,0),(pad1,pad1)), 'constant', constant_values=((maxCount, maxCount),(maxCount,maxCount))) return answerSheet, blankSheet
def qea(im): H = ss.hilbert(im,axis = 2) H = im+1j*H ia = np.abs(H) ip = np.angle(H) h1col = H[1:-1,:,:] h0col = H[:-2,:,:] h2col = H[2:,:,:] ifColSign = np.sign(np.real((h0col-h2col)/(2j*h1col))) ifCol = np.arccos((h2col+h0col)/(2*h1col)) ifCol = (np.abs(ifCol)*ifColSign)/np.pi/2 ifCol = np.pad(ifCol,((1,1),(0,0),(0,0)), mode='reflect') h0row = H[:,:-2,:] h1row = H[:,1:-1,:] h2row = H[:,2:,:] #ifxSign = np.sign(np.real((h2x-h0x)/(2j*h1x))) ifRow = np.arccos((h2row+h0row)/(2*h1row)) ifRow = (np.abs(ifRow))/np.pi/2 ifRow = np.pad(ifRow,((0,0),(1,1),(0,0)), mode='reflect') h0time = H[:,:,:-2] h1time = H[:,:,1:-1] h2time = H[:,:,2:] #ifxSign = np.sign(np.real((h2x-h0x)/(2j*h1x))) ifTime = np.arccos((h2time+h0time)/(2*h1time)) ifTime = (np.abs(ifTime))/np.pi/2 ifTime = np.pad(ifTime,((0,0),(0,0),(1,1)), mode='reflect') return(ia,ip,ifRow,ifCol,ifTime)
def deepflow2( im1=None, im2=None, match=None, options=""): """ flow = deepflow2.deepflow2(image1, image2, match=None, options='') Compute the flow between two images, eventually using given matches. Images must be HxWx3 numpy arrays (convert to float32). Match is an optional numpy array argument (None by default, ie no input match), where each row starts by x1 y1 x2 y2. Options is an optional string argument ('' by default), to set the options. Type deepflow2() to see the list of available options. The function returns the optical flow as a HxWx2 numpy array.""" #convert images if None in (im1,im2): usage_python() return assert im1.shape == im2.shape, "images must have the same shape" if im1.dtype != float32: im1 = im1.astype(float32) if im2.dtype != float32: im2 = im2.astype(float32) h, w, nchannels = im1.shape assert nchannels==3, "images must have 3 channels" stride = 4*((w+3)//4) im1 = pad( rollaxis(im1,2), ((0,0),(0,0),(0, stride-w)), 'constant') im2 = pad( rollaxis(im2,2), ((0,0),(0,0),(0, stride-w)), 'constant') # allocate flow flowx = empty((h,stride), dtype=float32) flowy = empty((h,stride), dtype=float32) # compute flow if match is not None: assert match.shape[1]>=4 match = ascontiguousarray(match[:,:4], dtype=float32) deepflow2_numpy( w, flowx, flowy, im1, im2, match, options) return concatenate ( (flowx[:,:w,None], flowy[:,:w,None]), axis=2)
def _zero_pad_to_same_size(a, b): [ay, ax], [by, bx] = a.shape, b.shape if ax < bx or ay < by: a = np.pad(a, ( (by-ay,0),(bx-ax,0) ), mode='constant') elif ax > bx or ay > by: b = np.pad(b, ( (ay-by,0),(ax-bx,0) ), mode='constant') return a, b, [ax-bx, ay-by]
def conv_backward_naive(dout, cache): """ A naive implementation of the backward pass for a convolutional layer. Inputs: - dout: Upstream derivatives. - cache: A tuple of (x, w, b, conv_param) as in conv_forward_naive Returns a tuple of: - dx: Gradient with respect to x - dw: Gradient with respect to w - db: Gradient with respect to b """ # Setting up x, w, b, conv_param = cache dx = np.zeros_like(x) dw = np.zeros_like(w) db = np.zeros_like(b) stride = conv_param['stride'] pad = conv_param['pad'] N, C, H, W = x.shape F, _, HH, WW = w.shape h_out = 1 + (H + 2 * pad - HH) / stride w_out = 1 + (W + 2 * pad - WW) / stride ############################################################################# # TODO: Implement the convolutional backward pass. # ############################################################################# # Padding x and dx x_padded = np.pad(x, [(0, 0), (0, 0), (pad, pad), (pad, pad)], mode='constant') dx_padded = np.pad(dx, [(0, 0), (0, 0), (pad, pad), (pad, pad)], mode='constant') # Do the convolutions # For every image, pass it through the filter and update the output for image in range(N): for filter in range(F): # Then, do the convolutions in over H and W for height in xrange(h_out): end_point_height = height * stride for width in xrange(w_out): end_point_width = width * stride # Make the convolution window conv_window = x_padded[image, :, end_point_height:end_point_height + HH, end_point_width:end_point_width + WW] # And update the derivatives db[filter] += dout[image, filter, height, width] dw[filter] += conv_window * dout[image, filter, height, width] # Update DX at the convolution window dx_padded[image, :, end_point_height:end_point_height + HH, end_point_width:end_point_width + WW] += \ w[filter] * \ dout[image, filter, height, width] # And remove the padding dx = dx_padded[:, :, pad:pad + H, pad:pad + W] ############################################################################# # END OF YOUR CODE # ############################################################################# return dx, dw, db
def _DataToInputs(self, spec, labels, weighted_labels, length, filename, truncated_length): # This method re-implements a portion of the TensorFlow graph using numpy. # While typically it is frowned upon to test complicated code with other # code, there is no way around this for testing the pipeline end to end, # which requires an actual spec computation. Furthermore, much of the # complexity of the pipeline is due to the TensorFlow implementation, # so comparing it against simpler numpy code still provides effective # coverage. truncated_length = (min(truncated_length, length) if truncated_length else length) # Pad or slice spec if differs from truncated_length. if len(spec) < truncated_length: pad_amt = truncated_length - len(spec) spec = np.pad(spec, [(0, pad_amt), (0, 0)], 'constant') else: spec = spec[0:truncated_length] # Pad or slice labels if differs from truncated_length. if len(labels) < truncated_length: pad_amt = truncated_length - len(labels) labels = np.pad(labels, [(0, pad_amt), (0, 0)], 'constant') else: labels = labels[0:truncated_length] inputs = [[spec, labels, truncated_length, filename]] return inputs
def zero_padding(): samples = 77 rec_period = 4 sampling_rate = samples/rec_period time = np.linspace(0, rec_period, samples) sin = np.sin(time*3.75*np.pi) win = np.hanning(len(sin)) pad_count = 23 padded_sin = np.pad(sin, (0,pad_count), 'constant') fft = np.fft.rfft(sin) fft_padded = np.fft.rfft(padded_sin) bins = (np.fft.rfftfreq(len(sin))*sampling_rate)[1:] plt.subplot(321) plt.plot(time, sin) plt.subplot(322) plt.plot(bins, (np.abs(fft))[1:]*2/samples, "o") plt.subplot(323) plt.plot(np.linspace(0, (samples+pad_count)*rec_period/float(samples), samples+pad_count), padded_sin) plt.subplot(324) plt.plot((np.fft.rfftfreq(len(padded_sin))*sampling_rate)[1:], (np.abs(fft_padded))[1:]*2/samples, "o") plt.subplot(325) padded_sin_win = np.pad(sin*win, (0, pad_count), 'constant') plt.plot(np.linspace(0, (samples+pad_count)*rec_period/float(samples), samples+pad_count), padded_sin_win) plt.subplot(326) plt.plot((np.fft.rfftfreq(len(padded_sin_win))*sampling_rate)[1:], np.abs(np.fft.rfft(padded_sin_win))[1:]*2/samples, "o") matplotlib2tikz.save( 'myfile.tikz' ) plt.show()
def do(self,s, sigma, aArray, cArray): # center of filter (in both directions) c = s / 2 # extend aArray and cArray aaArray = np.pad(aArray, ((c, c), (c, c), (c, c)),'edge') ccArray = np.pad(cArray, ((c, c), (c, c), (c, c)), 'constant', constant_values=(False)) c = float(c) # define gaussian function def gaussian(cx, cy, cz, w): return lambda x,y,z: np.exp(-(((cx-x)/w)**2 + ((cy-y)/w)**2 + ((cz-z)/w)**2)/2) # define gaussian filter x,y,z = np.mgrid[0:s,0:s,0:s] filt = gaussian(c,c,c,sigma)(x,y,z) print filt dArray = aaArray[:,:,:]; Ashape = aaArray.shape c = int(c) for i in range(c, Ashape[0] - c): for j in range(c, Ashape[1] - c): for k in range(c, Ashape[2] - c): if ccArray[i][j][k] == False: continue tempA = aaArray[(i - c):(i + c + 1), (j - c):(j + c + 1), (k - c):(k + c + 1)] tempC = ccArray[(i - c):(i + c + 1), (j - c):(j + c + 1), (k - c):(k + c + 1)] mask = np.where(tempC) dArray[i][j][k] = np.sum(tempA[mask]*filt[mask])/np.sum(filt[mask]) return dArray[c:-c, c:-c, c:-c]
def _extend_data_to_include(self, url, url2): index_1, index_2 = self.get_indexes(url, url2) missing = index_1 < 0 or index_2 < 0 if missing: none_index = self.get_index(None) if none_index >= len(self.known_urls) - 2: #extend per 500 for performance self.known_urls = self.known_urls + [None]*500 if index_1 < 0: self.known_urls[none_index] = url none_index += 1 if index_2 < 0: self.known_urls[none_index] = url2 none_index += 1 size = len(self.known_urls) padding = size - self.click_matrix.shape[0] self.click_matrix = \ np.matrix(np.pad(self.click_matrix, pad_width=([0,padding], [0,padding]), mode='constant')) padding = size - len(self.spend_time) self.spend_time = np.pad(self.spend_time, pad_width=(0, padding), mode='constant')
def __init__(self,h5_path,image_paths,max_q=None,max_mc=None): self.h5 = h5py.File(h5_path,mode='r') self.image_ids = self.h5['image_ids'].value self.questions = self.h5['questions'].value self.multiple_choice = self.h5['multiple_choice'].value self.answers = self.h5['answers'].value self.bounding_boxes = dict((k,v) for (k,v) in zip(self.h5['img_list'].value, self.h5['bounding_boxes'].value)) self.N = len(self.image_ids) if max_q: if max_q<self.questions.shape[1]: self.questions = self.questions[:,:max_q] else: self.questions = np.pad(self.questions, ((0,0),(0,max_q-self.questions.shape[-1])), 'constant',constant_values=a_w2i['</s>']) if max_mc: if max_mc<self.multiple_choice.shape[-1]: self.multiple_choice = self.multiple_choice[:,:,max_mc] else: self.multiple_choice = np.pad(self.multiple_choice, ((0,0),(0,0),(0,max_mc-self.multiple_choice.shape[-1])), 'constant',constant_values=a_w2i['</s>']) self.max_q = self.questions.shape[1] self.indexes = np.arange(self.N) self.image_paths = image_paths
def get_input_matrices(batch_data): X_prem, X_hypo, y = process(batch_data) batch_size = len(X_prem) # Maximum length of premise sentence MAX_LENGTH_PREM = max([len(entry) for entry in X_prem]) # Maximum length of hypothesis sentence MAX_LENGTH_HYPO = max([len(entry) for entry in X_hypo]) # Mask is used in Lasagne LSTM layer X_prem_mask = np.zeros((batch_size, MAX_LENGTH_PREM)) X_hypo_mask = np.zeros((batch_size, MAX_LENGTH_HYPO)) for i in range(batch_size): X_prem_mask[i, :len(X_prem[i])] = 1 X_prem[i] = np.pad(X_prem[i], [(0, MAX_LENGTH_PREM - len(X_prem[i])), (0, 0)], 'constant') for i in range(batch_size): X_hypo_mask[i, :len(X_hypo[i])] = 1 X_hypo[i] = np.pad(X_hypo[i], [(0, MAX_LENGTH_HYPO - len(X_hypo[i])), (0, 0)], 'constant') X_prem = np.asarray(X_prem) X_hypo = np.asarray(X_hypo) y = np.asarray(y) return X_prem, X_prem_mask, X_hypo, X_hypo_mask, y
def saveSlidingWindows((im_path,filter_size,step_size,out_file_pre,idx)): print idx; im=scipy.misc.imread(im_path); pad_r=getPadTuple(im.shape[0],filter_size[0],step_size); pad_c=getPadTuple(im.shape[1],filter_size[1],step_size); if len(im.shape)>2: im=np.pad(im,(pad_r,pad_c,(0,0)),'edge') else: im=np.pad(im,(pad_r,pad_c),'edge'); start_r=0; idx_r=0; out_files=[]; while start_r<im.shape[0]: start_c=0; idx_c=0; while start_c<im.shape[1]: end_r=start_r+filter_size[0]; end_c=start_c+filter_size[1]; crop_curr=im[start_r:end_r,start_c:end_c]; out_file_curr=out_file_pre+'_'+str(idx_r)+'_'+str(idx_c)+'.png'; scipy.misc.imsave(out_file_curr,crop_curr); out_files.append(out_file_curr); start_c=start_c+step_size; idx_c+=1; start_r=start_r+step_size; idx_r+=1; return out_files;
def local_pad(x): # TODO replace with pad global function if nf <= 1: return x if self._margin: ma = self._margin ba = (nf - 1) / 2 # before/after if x.shape[0] - 2*ma <= 0: print "shape[0]:", x.shape[0] print "ma:", ma if x.shape[1] * nf <= 0: print "shape[1]:", x.shape[1] print "nf:", nf ret = numpy.zeros((x.shape[0] - 2 * ma, x.shape[1] * nf), dtype=theano.config.floatX) if ba <= ma: for j in xrange(ret.shape[0]): ret[j] = x[j:j + 2*ma + 1].flatten() else: for j in xrange(ret.shape[0]): ret[j] = numpy.pad(x[max(0, j - ba):j + ba +1].flatten(), (max(0, (ba - j) * x.shape[1]), max(0, ((j + ba + 1) - x.shape[0]) * x.shape[1])), 'constant', constant_values=(0, 0)) return ret else: ret = numpy.zeros((x.shape[0], x.shape[1] * nf), dtype=theano.config.floatX) ba = (nf - 1) / 2 # before/after for j in xrange(x.shape[0]): ret[j] = numpy.pad(x[max(0, j - ba):j + ba +1].flatten(), (max(0, (ba - j) * x.shape[1]), max(0, ((j + ba + 1) - x.shape[0]) * x.shape[1])), 'constant', constant_values=(0, 0)) return ret
def treatArray(data): if border_mode == 'keep': return data if n_dim == 3: sh = (data.shape[0], ) + data.shape[2:] # exclude channel (z,x,y) else: sh = data.shape[2:] # (x,y) if border_mode == 'crop': excess = map(lambda x: int((x[0] - x[1]) // 2), zip(sh, ps)) if n_dim == 3: data = data[excess[0]:excess[0] + ps[0], :, excess[1]:excess[1] + ps[1], excess[2]:excess[2] + ps[2]] elif n_dim == 2: data = data[:, :, excess[0]:excess[0] + ps[0], excess[1]: excess[1] + ps[1]] else: excess_l = map(lambda x: int(np.ceil(float(x[0] - x[1]) / 2)), zip(ps, sh)) excess_r = map(lambda x: int(np.floor(float(x[0] - x[1]) / 2)), zip(ps, sh)) if n_dim == 3: pad_with = [(excess_l[0], excess_r[0]), (0, 0), (excess_l[1], excess_r[1]), (excess_l[2], excess_r[2])] else: pad_with = [(0, 0), (0, 0), (excess_l[0], excess_r[0]), (excess_l[1], excess_r[1])] if border_mode == 'mirror': data = np.pad(data, pad_with, mode='symmetric') if border_mode == '0-pad': data = np.pad(data, pad_with, mode='constant', constant_values=0) return data
def test_find_center_vo_with_downsampling(self): sim = read_file('sinogram.npy') np.pad( sim, ((1000, 1000), (0, 0), (1000, 1000)), mode="constant", constant_values=0) cen = find_center_vo(sim) assert_allclose(cen, 45.28, rtol=0.015)
def calcgrad(i): #i=cv2.imread("images.png",0) #i=[[1,2,3,4],[5,6,7,8],[9,10,11,12],[13,14,15,16]] #i=np.array(i) height,width=i.shape first=np.pad(i,((0,0),(1,0)),'constant') second=np.pad(i,((0,1),(1,0)),'constant') third=np.pad(i,((0,1),(0,0)),'constant') fourth=np.pad(i,((0,1),(0,1)),'constant') first=first[:,0:width] second=second[1:height+1,0:width] third=third[1:height+1,:] fourth=fourth[1:height+1,1:width+1] first=i-first second=i-second third=i-third fourth=i-fourth combo1=32*np.array( first >= second, dtype=int) combo2=16*np.array( first >= third, dtype=int) combo3=8*np.array( first >= fourth, dtype=int) combo4=4*np.array( second >= third, dtype=int) combo5=2*np.array( second >= fourth, dtype=int) combo6=np.array( third >= fourth, dtype=int) ldgp=combo1+combo2+combo3+combo4+combo5+combo6 return ldgp
def __init__(self, G_list, max_num_nodes, features='id'): self.max_num_nodes = max_num_nodes self.adj_all = [] self.len_all = [] self.feature_all = [] for G in G_list: adj = nx.to_numpy_matrix(G) # the diagonal entries are 1 since they denote node probability self.adj_all.append( np.asarray(adj) + np.identity(G.number_of_nodes())) self.len_all.append(G.number_of_nodes()) if features == 'id': self.feature_all.append(np.identity(max_num_nodes)) elif features == 'deg': degs = np.sum(np.array(adj), 1) degs = np.expand_dims(np.pad(degs, [0, max_num_nodes - G.number_of_nodes()], 0), axis=1) self.feature_all.append(degs) elif features == 'struct': degs = np.sum(np.array(adj), 1) degs = np.expand_dims(np.pad(degs, [0, max_num_nodes - G.number_of_nodes()], 'constant'), axis=1) clusterings = np.array(list(nx.clustering(G).values())) clusterings = np.expand_dims(np.pad(clusterings, [0, max_num_nodes - G.number_of_nodes()], 'constant'), axis=1) self.feature_all.append(np.hstack([degs, clusterings]))
def _fixOddKernel(kernel): """Take a kernel with odd dimensions and make them even for FFT Parameters ---------- kernel : `numpy.array` a numpy.array Returns ------- out : `numpy.array` a fixed kernel numpy.array. Returns a copy if the dimensions needed to change; otherwise just return the input kernel. """ # Note this works best for the FFT if we left-pad out = kernel changed = False if (out.shape[0] % 2) == 1: out = np.pad(out, ((1, 0), (0, 0)), mode='constant') changed = True if (out.shape[1] % 2) == 1: out = np.pad(out, ((0, 0), (1, 0)), mode='constant') changed = True if changed: out *= (np.mean(kernel) / np.mean(out)) # need to re-scale to same mean for FFT return out
def cross_correlation(x, y, maxlag): """ Cross correlation with a maximum number of lags. `x` and `y` must be one-dimensional numpy arrays with the same length. This computes the same result as numpy.correlate(x, y, mode='full')[len(a)-maxlag-1:len(a)+maxlag] The return vaue has length 2*maxlag + 1. Author: http://stackoverflow.com/questions/30677241 Warren Weckesser """ from numpy.lib.stride_tricks import as_strided def _check_arg(x, xname): x = np.asarray(x) if x.ndim != 1: raise ValueError('%s must be one-dimensional.' % xname) return x x = _check_arg(x, 'x') y = _check_arg(y, 'y') py = np.pad(y.conj(), 2*maxlag, mode='constant') T = as_strided(py[2*maxlag:], shape=(2*maxlag+1, len(y) + 2*maxlag), strides=(-py.strides[0], py.strides[0])) px = np.pad(x, maxlag, mode='constant') return T.dot(px)
def __init__(self,h5_path,image_paths,max_q=None,max_mc=None): self.h5 = h5py.File(h5_path,mode='r') self.image_ids = self.h5['image_ids'].value self.questions = self.h5['questions'].value self.multiple_choice = self.h5['multiple_choices'].value self.answers = self.h5['ground_truth'].value self.N = len(self.image_ids) if max_q: if max_q<self.questions.shape[1]: self.questions = self.questions[:,:max_q] else: self.questions = np.pad(self.questions, ((0,0),(0,max_q-self.questions.shape[-1])), 'constant',constant_values=w2i['</s>']) if max_mc: if max_mc<self.multiple_choice.shape[-1]: self.multiple_choice = self.multiple_choice[:,:,max_mc] else: self.multiple_choice = np.pad(self.multiple_choice, ((0,0),(0,0),(0,max_mc-self.multiple_choice.shape[-1])), 'constant',constant_values=w2i['</s>']) self.max_mc = self.multiple_choice.shape[1] self.max_q = self.questions.shape[1] self.indexes = np.arange(self.N) self.image_paths = image_paths
if st.split('_')[0] == 'bk': bk = get_bank_size(int(st.split('_')[1])) st_id_list.append(bk[0]) st_list.append(bk) else: break if len(st_list) == 1: continue real_data_id.append(st_id_list) real_data.append(st_list) start_id_list.append(st_id_list[0]) end_id_list.append(st_id_list[-1]) real_data_id0 = real_data_id.copy() real_data_id = [x[:g_sequence_len] for x in real_data_id] real_data_id1 = [np.pad(x, (0,g_sequence_len - len(x))) for x in real_data_id] endtime = time.time(); dtime = endtime - starttime print("\nTime for loading real world data:%.8s s" % dtime) GENERATED_NUM = len(real_data_id1) print('\nGENERATED_NUM,real_data_id1', GENERATED_NUM) VOCAB_SIZE = len(x_info_ids)+1+10 # padding print('\nVOCAB_SIZE:',VOCAB_SIZE) print('real_vocab_size: ', len(x_info_ids)) starttime = time.time() x_index = [] for i in range(len(x_ids)): if x_ids[i] not in x_info_ids: x_index.append(i)
def _pad(seq, max_len, constant_values=0): return np.pad(seq, (0, max_len - len(seq)), mode='constant', constant_values=constant_values)
_, model_path, img_in, img_out = sys.argv[0:4] # For lib_maxout_theano_batch we can control batch size # batch_size = 1024 # if len(sys.argv) > 4: # batch_size = int(sys.argv[4]) # network = DeepNetwork(model_path, batch_size=batch_size) network = DeepNetwork(model_path) input_image = normalize_image_float(np.array(Image.open(img_in))) nx, ny = input_image.shape pad_by = network.pad_by pad_image = np.pad(input_image, ((pad_by, pad_by), (pad_by, pad_by)), 'symmetric') start_time = time.time() output = network.apply_net(pad_image, perform_pad=False) print 'Complete in {0:1.4f} seconds'.format(time.time() - start_time) im = Image.fromarray(np.uint8(output * 255)) im.save(img_out) print "Image saved." import h5py f = h5py.File(img_out.replace('.tif', '') + '.h5') f['/probabilities'] = output
def __getitem__(self, idx): inst = self.lineIndex[idx] author = inst[0] lines = inst[1] batch = [] for line in lines: if line >= len(self.authors[author]): line = (line + 37) % len(self.authors[author]) img_path, gt, pad_above, pad_below = self.authors[author][line] img = cv2.imread(img_path, 0) #read as grayscale if img is None: return None if pad_above < 0: img = img[-pad_above:, :] pad_above = 0 if pad_below < 0: img = img[:pad_below, :] pad_below = 0 #if pad_above>0 or pad_below>0: img = img = np.pad(img, ((pad_above, pad_below), (10, 10)), 'constant', constant_values=255) #we also pad a bit on the sides #print('{}, {} {}'.format(img_path,pad_above,pad_below)) if img.shape[0] != self.img_height: if img.shape[0] < self.img_height and not self.warning: self.warning = True print("WARNING: upsampling image to fit size") percent = float(self.img_height) / img.shape[0] if img.shape[1] * percent > self.max_width: percent = self.max_width / img.shape[1] img = cv2.resize(img, (0, 0), fx=percent, fy=percent, interpolation=cv2.INTER_CUBIC) if img.shape[0] < self.img_height: diff = self.img_height - img.shape[0] img = np.pad(img, ((diff // 2, diff // 2 + diff % 2), (0, 0)), 'constant', constant_values=255) if len(img.shape) == 2: img = img[..., None] if self.fg_masks_dir is not None: fg_path = os.path.join(self.fg_masks_dir, '{}_{}.png'.format(author, line)) fg_mask = cv2.imread(fg_path, 0) fg_mask = fg_mask / 255 if fg_mask.shape != img[:, :, 0].shape: print( 'Error, fg_mask ({}, {}) not the same size as image ({})' .format(fg_path, fg_mask.shape, img[:, :, 0].shape)) th, fg_mask = cv2.threshold( img, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) fg_mask = 255 - fg_mask ele = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (9, 9)) fg_mask = cv2.dilate(fg_mask, ele) fg_mask = fg_mask / 255 if self.augmentation is not None: #img = augmentation.apply_random_color_rotation(img) img = augmentation.apply_tensmeyer_brightness(img) img = grid_distortion.warp_image(img) if len(img.shape) == 2: img = img[..., None] img = img.astype(np.float32) img = 1.0 - img / 128.0 if len(gt) == 0: return None gt_label = string_utils.str2label_single(gt, self.char_to_idx) if self.styles: style_i = self.npr.choice(len(self.styles[author][id])) style = self.styles[author][id][style_i] else: style = None name = img_path[img_path.rfind('/') + 1:img_path.rfind('.')] spaced_label = None if self.spaced_by_name is None else self.spaced_by_name[ img_path] if spaced_label is not None: assert (spaced_label.shape[1] == 1) toAppend = { "image": img, "gt": gt, "style": style, "gt_label": gt_label, "spaced_label": spaced_label, "name": name, "center": self.center, "author": author } if self.fg_masks_dir is not None: toAppend['fg_mask'] = fg_mask batch.append(toAppend) #batch = [b for b in batch if b is not None] #These all should be the same size or error assert len(set([b['image'].shape[0] for b in batch])) == 1 assert len(set([b['image'].shape[2] for b in batch])) == 1 dim0 = batch[0]['image'].shape[0] dim1 = max([b['image'].shape[1] for b in batch]) dim2 = batch[0]['image'].shape[2] all_labels = [] label_lengths = [] if self.spaced_by_name is not None: spaced_labels = [] else: spaced_labels = None max_spaced_len = 0 input_batch = np.full((len(batch), dim0, dim1, dim2), PADDING_CONSTANT).astype(np.float32) if self.fg_masks_dir is not None: fg_masks = np.full((len(batch), dim0, dim1, 1), 0).astype(np.float32) for i in range(len(batch)): b_img = batch[i]['image'] toPad = (dim1 - b_img.shape[1]) if 'center' in batch[0] and batch[0]['center']: toPad //= 2 else: toPad = 0 input_batch[i, :, toPad:toPad + b_img.shape[1], :] = b_img if self.fg_masks_dir is not None: fg_masks[i, :, toPad:toPad + b_img.shape[1], 0] = batch[i]['fg_mask'] l = batch[i]['gt_label'] all_labels.append(l) label_lengths.append(len(l)) if spaced_labels is not None: sl = batch[i]['spaced_label'] spaced_labels.append(sl) max_spaced_len = max(max_spaced_len, sl.shape[0]) #all_labels = np.concatenate(all_labels) label_lengths = torch.IntTensor(label_lengths) max_len = label_lengths.max() all_labels = [ np.pad(l, ((0, max_len - l.shape[0]), ), 'constant') for l in all_labels ] all_labels = np.stack(all_labels, axis=1) if self.spaced_by_name is not None: spaced_labels = [ np.pad(l, ((0, max_spaced_len - l.shape[0]), (0, 0)), 'constant') for l in spaced_labels ] ddd = spaced_labels spaced_labels = np.concatenate(spaced_labels, axis=1) spaced_labels = torch.from_numpy(spaced_labels) assert (spaced_labels.size(1) == len(batch)) images = input_batch.transpose([0, 3, 1, 2]) images = torch.from_numpy(images) labels = torch.from_numpy(all_labels.astype(np.int32)) #label_lengths = torch.from_numpy(label_lengths.astype(np.int32)) if self.fg_masks_dir is not None: fg_masks = fg_masks.transpose([0, 3, 1, 2]) fg_masks = torch.from_numpy(fg_masks) if batch[0]['style'] is not None: styles = np.stack([b['style'] for b in batch], axis=0) styles = torch.from_numpy(styles).float() else: styles = None mask, top_and_bottom, center_line = makeMask(images, self.mask_post, self.mask_random) ##DEBUG #for i in range(5): # mask2, top_and_bottom2 = makeMask(images,self.mask_post, self.mask_random) # #extra_masks.append(mask2) # mask2 = ((mask2[0,0]+1)/2).numpy().astype(np.uint8)*255 # cv2.imshow('mask{}'.format(i),mask2) #mask = ((mask[0,0]+1)/2).numpy().astype(np.uint8)*255 #cv2.imshow('mask'.format(i),mask) #cv2.waitKey() toRet = { "image": images, "mask": mask, "top_and_bottom": top_and_bottom, "center_line": center_line, "label": labels, "style": styles, "label_lengths": label_lengths, "gt": [b['gt'] for b in batch], "spaced_label": spaced_labels, "name": [b['name'] for b in batch], "author": [b['author'] for b in batch], } if self.fg_masks_dir is not None: toRet['fg_mask'] = fg_masks return toRet
def __init__(self, dirPath, split, config): if 'split' in config: split = config['split'] if split == 'test': subdir = 'testdataset_ICDAR' else: subdir = 'training_WR' self.img_height = config['img_height'] self.batch_size = config['a_batch_size'] self.max_width = config['max_width'] if 'max_width' in config else 1300 skip_pad = config['skip_pad'] if 'skip_pad' in config else False #assert(config['batch_size']==1) #with open(os.path.join(dirPath,'sets.json')) as f: words_file = os.path.join( dirPath, 'groundtruth_{}2009_pageNorm.txt'.format(split)) if not os.path.exists(words_file): #create modified GT file with appropriate padding anotation #the padding is so images by the same author have the same height (after normalization) authors = defaultdict(list) with open( os.path.join(dirPath, 'groundtruth_{}2009.txt'.format(split))) as f: word_list = f.readlines() for line in word_list: m = re.match('(lot_.+\/([^/]+)\/[^/]+.tiff) (.+)', line.strip()) path = os.path.join(dirPath, subdir, m[1]) author = m[2] gt = m[3] authors[author].append((m[1], gt)) new_lines = [] for author, lines in authors.items(): above = [] below = [] n_lines = [] #to remove non-existent images #we measure the number of pixels above and below the centerline in each image for i, (path, gt) in enumerate(lines): img_path = os.path.join(dirPath, subdir, path) img = cv2.imread(img_path, 0) #read as grayscale if img is None: continue img = 255 - img img = cv2.blur( img, (21, 21), borderType=cv2.BORDER_CONSTANT) #borderValue=0) #th,binarized = cv2.threshold(img,0,1,cv2.THRESH_BINARY+cv2.THRESH_OTSU) #binarized = 1-binarized img = img.sum(axis=1) centerline = np.argmax( img) #assume center is where most ink is (after blur) assert (centerline > img.shape[0] * 0.1) assert (centerline < img.shape[0] * 0.9) above.append(centerline) below.append(img.shape[0] - centerline) n_lines.append((path, gt)) above_mean = np.mean(above) above_std = np.std(above) above_max = np.max(above) below_mean = np.mean(below) below_std = np.std(below) below_max = np.max(below) #we want to pad/crop to include most ascenders and descenders, but dont need all goal_above = int(min(above_max, above_mean + above_std * 2)) goal_below = int(min(below_max, below_mean + below_std * 2)) for i, (path, gt) in enumerate(n_lines): pad_above = goal_above - above[i] pad_below = goal_below - below[i] new_lines.append((path, gt, pad_above, pad_below)) with open(words_file, 'w') as f: for path, gt, above, below in new_lines: f.write('{} {} {} {}\n'.format(path, gt, above, below)) with open(words_file) as f: word_list = f.readlines() self.authors = defaultdict(list) self.lineIndex = [] self.max_char_len = 0 for line in word_list: m = re.match( '(lot_.+\/([^/]+)\/[^/]+.tiff) ([^ ]+) (-?\d+) (-?\d+)', line.strip()) path = os.path.join(dirPath, subdir, m[1]) author = m[2] gt = m[3] self.max_char_len = max(self.max_char_len, len(gt)) if skip_pad: pad_above = 0 pad_below = 0 else: pad_above = int(m[4]) pad_below = int(m[5]) self.authors[author].append((path, gt, pad_above, pad_below)) #self.lineIndex += [(author,i+authorLines) for i in range(len(lines))] #minLines=99999 #for author,lines in self.authors.items(): #print('{} {}'.format(author,len(lines))) #minLines = min(minLines,len(lines)) #maxCombs = int(nCr(minLines,self.batch_size)*1.2) short = config['short'] if 'short' in config else False for author, lines in self.authors.items(): #if split=='train': # combs=list(itertools.combinations(list(range(len(lines))),self.batch_size)) # np.random.shuffle(combs) # self.lineIndex += [(author,c) for c in combs[:maxCombs]] #else: for i in range(len(lines) // self.batch_size): ls = [] for n in range(self.batch_size): ls.append(self.batch_size * i + n) inst = (author, ls) self.lineIndex.append(inst) if short and i >= short: break if short and i >= short: continue leftover = len(lines) % self.batch_size fill = self.batch_size - leftover last = [] for i in range(fill): last.append(i % len(lines)) for i in range(leftover): last.append(len(lines) - (1 + i)) self.lineIndex.append((author, last)) self.fg_masks_dir = config[ 'fg_masks_dir'] if 'fg_masks_dir' in config else None self.warning = False if self.fg_masks_dir is not None: if self.fg_masks_dir[-1] == '/': self.fg_masks_dir = self.fg_masks_dir[:-1] self.fg_masks_dir += '_{}'.format(self.max_width) ensure_dir(self.fg_masks_dir) for author, lines in self.lineIndex: for line in lines: img_path, gt, pad_above, pad_below = self.authors[author][ line] fg_path = os.path.join(self.fg_masks_dir, '{}_{}.png'.format(author, line)) if not os.path.exists(fg_path): img = cv2.imread(img_path, 0) #read as grayscale if img is None: continue if pad_above < 0: img = img[-pad_above:, :] pad_above = 0 if pad_below < 0: img = img[:pad_below, :] pad_below = 0 #if pad_above>0 or pad_below>0: img = img = np.pad(img, ((pad_above, pad_below), (10, 10)), 'constant', constant_values=255) if img.shape[0] != self.img_height: if img.shape[ 0] < self.img_height and not self.warning: self.warning = True print("WARNING: upsampling image to fit size") percent = float(self.img_height) / img.shape[0] if img.shape[1] * percent > self.max_width: percent = self.max_width / img.shape[1] img = cv2.resize(img, (0, 0), fx=percent, fy=percent, interpolation=cv2.INTER_CUBIC) if img.shape[0] < self.img_height: diff = self.img_height - img.shape[0] img = np.pad( img, ((diff // 2, diff // 2 + diff % 2), (0, 0)), 'constant', constant_values=255) th, binarized = cv2.threshold( img, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) binarized = 255 - binarized ele = cv2.getStructuringElement( cv2.MORPH_ELLIPSE, (9, 9)) binarized = cv2.dilate(binarized, ele) cv2.imwrite(fg_path, binarized) print('saved fg mask: {}'.format(fg_path)) char_set_path = config['char_file'] with open(char_set_path) as f: char_set = json.load(f) self.char_to_idx = char_set['char_to_idx'] self.augmentation = config[ 'augmentation'] if 'augmentation' in config else None #DEBUG if 'overfit' in config and config['overfit']: self.lineIndex = self.lineIndex[:10] self.center = False #config['center_pad'] #if 'center_pad' in config else True if 'style_loc' in config: by_author_styles = defaultdict(list) by_author_all_ids = defaultdict(set) style_loc = config['style_loc'] if style_loc[-1] != '*': style_loc += '*' all_style_files = glob(style_loc) assert (len(all_style_files) > 0) for loc in all_style_files: #print('loading '+loc) with open(loc, 'rb') as f: styles = pickle.load(f) for i in range(len(styles['authors'])): by_author_styles[styles['authors'][i]].append( (styles['styles'][i], styles['ids'][i])) by_author_all_ids[styles['authors'][i]].update( styles['ids'][i]) self.styles = defaultdict(lambda: defaultdict(list)) for author in by_author_styles: for id in by_author_all_ids[author]: for style, ids in by_author_styles[author]: if id not in ids: self.styles[author][id].append(style) for author in self.authors: assert (author in self.styles) else: self.styles = None if 'spaced_loc' in config: with open(config['spaced_loc'], 'rb') as f: self.spaced_by_name = pickle.load(f) #for name,v in spaced_by_name.items(): # author, id = name.split('_') else: self.spaced_by_name = None self.mask_post = config['mask_post'] if 'mask_post' in config else [] self.mask_random = config[ 'mask_random'] if 'mask_random' in config else False
def __getitem__(self, index): #--------- # Image #--------- img_path = self.img_files[index % len(self.img_files)].rstrip() img = np.array(Image.open(img_path)) # Handles images with less than three channels while len(img.shape) != 3: index += 1 img_path = self.img_files[index % len(self.img_files)].rstrip() img = np.array(Image.open(img_path)) h, w, _ = img.shape dim_diff = np.abs(h - w) # Upper (left) and lower (right) padding pad1, pad2 = dim_diff // 2, dim_diff - dim_diff // 2 # Determine padding pad = ((pad1, pad2), (0, 0), (0, 0)) if h <= w else ((0, 0), (pad1, pad2), (0, 0)) # Add padding input_img = np.pad(img, pad, 'constant', constant_values=128) / 255. padded_h, padded_w, _ = input_img.shape # Resize and normalize input_img = resize(input_img, (*self.img_shape, 3), mode='reflect') # Channels-first input_img = np.transpose(input_img, (2, 0, 1)) # As pytorch tensor input_img = torch.from_numpy(input_img).float() #--------- # Label #--------- label_path = self.label_files[index % len(self.img_files)].rstrip() labels = None if os.path.exists(label_path): labels = np.loadtxt(label_path).reshape(-1, 5) # Extract coordinates for unpadded + unscaled image x1 = w * (labels[:, 1] - labels[:, 3]/2) y1 = h * (labels[:, 2] - labels[:, 4]/2) x2 = w * (labels[:, 1] + labels[:, 3]/2) y2 = h * (labels[:, 2] + labels[:, 4]/2) # Adjust for added padding x1 += pad[1][0] y1 += pad[0][0] x2 += pad[1][0] y2 += pad[0][0] # Calculate ratios from coordinates labels[:, 1] = ((x1 + x2) / 2) / padded_w labels[:, 2] = ((y1 + y2) / 2) / padded_h labels[:, 3] *= w / padded_w labels[:, 4] *= h / padded_h # Fill matrix filled_labels = np.zeros((self.max_objects, 5)) if labels is not None: filled_labels[range(len(labels))[:self.max_objects]] = labels[:self.max_objects] filled_labels = torch.from_numpy(filled_labels) return img_path, input_img, filled_labels
def bilateralfilter(image, texture, sigma_s, sigma_r): r = int(np.ceil(3 * sigma_s)) # Image padding # Symmetric padding : pads along the reflected mirror of edge of the array # Ex: a [1,2,3,4,5] # np.pad(a, (2,3), 'symmetric') means pad 2 elements of first axis edge and 3 elements of second axis edge # result: [2,1,1,2,3,4,5,5,4,3] # Pads first&second edges of each dimension with 3*sigma (r) for applying filter on borders if image.ndim == 3: h, w, ch = image.shape I = np.pad(image, ((r, r), (r, r), (0, 0)), 'symmetric').astype(np.float32) elif image.ndim == 2: h, w = image.shape I = np.pad(image, ((r, r), (r, r)), 'symmetric').astype(np.float32) else: print('Input image is not valid!') return image # Check texture size equals given image size then do padding if texture.ndim == 3: ht, wt, cht = texture.shape # If texture shape is not equal to image shape, return if ht != h or wt != w: print('The guidance image is not aligned with input image!') return image # else pad texture T = np.pad(texture, ((r, r), (r, r), (0, 0)), 'symmetric').astype(np.int32) elif texture.ndim == 2: ht, wt = texture.shape if ht != h or wt != w: print('The guidance image is not aligned with input image!') return image T = np.pad(texture, ((r, r), (r, r)), 'symmetric').astype(np.int32) # Pre-compute # Create np array of zeros with the same shape of the image output = np.zeros_like(image) # e^(- x / 2sigma^2) scaleFactor_s = 1 / (2 * sigma_s * sigma_s) scaleFactor_r = 1 / (2 * sigma_r * sigma_r) # A lookup table for range kernel (COLOR) LUT = np.exp(-np.arange(256) * np.arange(256) * scaleFactor_r) # Generate a spatial Gaussian function (cutoff 6-sigma) # -r for symmetric grid Ex: 0->6 becomes -3 -> 3 x, y = np.meshgrid(np.arange(2 * r + 1) - r, np.arange(2 * r + 1) - r) # Create multi-variate gaussian distribution for spatial domain with x,y kernel_s = np.exp(-(x * x + y * y) * scaleFactor_s) # Main body if I.ndim == 2 and T.ndim == 2: # I1T1 (2D Image, 2D Texture) filter for y in range(r, r + h): for x in range(r, r + w): # Get gaussian values representing weights for the window wgt = LUT[np.abs(T[y - r:y + r + 1, x - r:x + r + 1] - T[y, x])] * kernel_s # Calculate the intensity of the current pixel using the weighted gaussian values # for j=-3sigma->3sigma sum(w(j) * I(j))/sum(w) output[y - r, x - r] = np.sum( wgt * I[y - r:y + r + 1, x - r:x + r + 1]) / np.sum(wgt) elif I.ndim == 3 and T.ndim == 2: # I3T1 (3D Image, 2D Texture) filter for y in range(r, r + h): for x in range(r, r + w): wgt = LUT[abs(T[y - r:y + r + 1, x - r:x + r + 1] - T[y, x])] * kernel_s wacc = np.sum(wgt) output[y - r, x - r, 0] = np.sum( wgt * I[y - r:y + r + 1, x - r:x + r + 1, 0]) / wacc output[y - r, x - r, 1] = np.sum( wgt * I[y - r:y + r + 1, x - r:x + r + 1, 1]) / wacc output[y - r, x - r, 2] = np.sum( wgt * I[y - r:y + r + 1, x - r:x + r + 1, 2]) / wacc elif I.ndim == 3 and T.ndim == 3: # I3T3 (3D Image, 3D Texture) filter for y in range(r, r + h): for x in range(r, r + w): # Product of 3 independent gaussians for each channel RGB wgt = LUT[abs(T[y - r:y + r + 1, x - r:x + r + 1, 0] - T[y, x, 0])] * \ LUT[abs(T[y - r:y + r + 1, x - r:x + r + 1, 1] - T[y, x, 1])] * \ LUT[abs(T[y - r:y + r + 1, x - r:x + r + 1, 2] - T[y, x, 2])] * \ kernel_s wacc = np.sum(wgt) output[y - r, x - r, 0] = np.sum( wgt * I[y - r:y + r + 1, x - r:x + r + 1, 0]) / wacc output[y - r, x - r, 1] = np.sum( wgt * I[y - r:y + r + 1, x - r:x + r + 1, 1]) / wacc output[y - r, x - r, 2] = np.sum( wgt * I[y - r:y + r + 1, x - r:x + r + 1, 2]) / wacc elif I.ndim == 2 and T.ndim == 3: # I1T3 filter for y in range(r, r + h): for x in range(r, r + w): wgt = LUT[abs(T[y - r:y + r + 1, x - r:x + r + 1, 0] - T[y, x, 0])] * \ LUT[abs(T[y - r:y + r + 1, x - r:x + r + 1, 1] - T[y, x, 1])] * \ LUT[abs(T[y - r:y + r + 1, x - r:x + r + 1, 2] - T[y, x, 2])] * \ kernel_s output[y - r, x - r] = np.sum( wgt * I[y - r:y + r + 1, x - r:x + r + 1]) / np.sum(wgt) else: print('Something wrong!') return image # return np.clip(output, 0, 255) return output
#convert to numpy array x1 = np.array(samples).astype("complex64") # mix down with fc fc1 = np.exp(-1.0j * 2.0 * np.pi * F_offset / Fs * np.arange(len(x1))) x2 = x1 * fc1 f_bw = 200000 Inp1 = x2.real * np.cos(2.0 * np.pi * Fc * np.arange(len(x2))) Qa1 = -1 * x2.imag * np.sin(2.0 * np.pi * Fc * np.arange(len(x2))) Inp2 = np.convolve(Inp1, h) Qa2 = np.convolve(Qa1, h) delta_T = 155 Inp2_delay = np.pad(Inp2, (155, 0), 'constant', constant_values=(0, 0)) Qa2_delay = np.pad(Qa2, (155, 0), 'constant', constant_values=(0, 0)) Inp3 = np.pad(Inp2, (0, 155), 'constant', constant_values=(0, 0)) Qa3 = np.pad(Qa2, (0, 155), 'constant', constant_values=(0, 0)) D = Qa3 * Inp2_delay - Inp3 * Qa2_delay y = [] d = [] k = 0 for x in D: k += 1 if x > 0: y.append(1) else: y.append(0) if k == 30: k = 0
def pad(img, padding, fill=0, padding_mode='constant'): """Pad the given PIL Image on all sides with speficified padding mode and fill value. Args: img (PIL Image): Image to be padded. padding (int or tuple): Padding on each border. If a single int is provided this is used to pad all borders. If tuple of length 2 is provided this is the padding on left/right and top/bottom respectively. If a tuple of length 4 is provided this is the padding for the left, top, right and bottom borders respectively. fill: Pixel fill value for constant fill. Default is 0. If a tuple of length 3, it is used to fill R, G, B channels respectively. This value is only used when the padding_mode is constant padding_mode: Type of padding. Should be: constant, edge, reflect or symmetric. Default is constant. constant: pads with a constant value, this value is specified with fill edge: pads with the last value on the edge of the image reflect: pads with reflection of image (without repeating the last value on the edge) padding [1, 2, 3, 4] with 2 elements on both sides in reflect mode will result in [3, 2, 1, 2, 3, 4, 3, 2] symmetric: pads with reflection of image (repeating the last value on the edge) padding [1, 2, 3, 4] with 2 elements on both sides in symmetric mode will result in [2, 1, 1, 2, 3, 4, 4, 3] square_constant: overrides padding to pad smallest edge with constant value, specified with fill, to make a square image. 2D or 3D imgs only. Returns: PIL Image: Padded image. """ if not _is_pil_image(img): raise TypeError('img should be PIL Image. Got {}'.format(type(img))) if not isinstance(padding, (numbers.Number, tuple)): raise TypeError('Got inappropriate padding arg') if not isinstance(fill, (numbers.Number, str, tuple)): raise TypeError('Got inappropriate fill arg') if not isinstance(padding_mode, str): raise TypeError('Got inappropriate padding_mode arg') if isinstance(padding, collections.Sequence) and len(padding) not in [2, 4]: raise ValueError("Padding must be an int or a 2, or 4 element tuple, not a " + "{} element tuple".format(len(padding))) assert padding_mode in ['constant', 'edge', 'reflect', 'symmetric', 'square_constant'], \ 'Padding mode should be either constant, square_constant, edge, reflect or symmetric' if padding_mode == 'constant': return ImageOps.expand(img, border=padding, fill=fill) elif padding_mode == 'square_constant': if len(img.size) > 3 or len(img.size) < 2: raise ValueError("padding_mode == 'square_constant' valid only for 2D or 3D images") im_size = img.size if len(img.size) == 2 else img.size[:-1] pad_amt, pad_dim = (np.max(im_size) - np.min(im_size))//2, np.argmin(im_size) padding = np.zeros((2,), dtype=int) padding[pad_dim] = pad_amt return ImageOps.expand(img, border=tuple(padding), fill=fill) else: if isinstance(padding, int): pad_left = pad_right = pad_top = pad_bottom = padding if isinstance(padding, collections.Sequence) and len(padding) == 2: pad_left = pad_right = padding[0] pad_top = pad_bottom = padding[1] if isinstance(padding, collections.Sequence) and len(padding) == 4: pad_left = padding[0] pad_top = padding[1] pad_right = padding[2] pad_bottom = padding[3] img = np.asarray(img) # RGB image if len(img.shape) == 3: img = np.pad(img, ((pad_top, pad_bottom), (pad_left, pad_right), (0, 0)), padding_mode) # Grayscale image if len(img.shape) == 2: img = np.pad(img, ((pad_top, pad_bottom), (pad_left, pad_right)), padding_mode) return Image.fromarray(img)
def apply(self, data): lvl = np.array([0.1, 4, 8, 12, 30, 70, 180]) # Frequency levels in Hz data = data.transpose() data = np.pad(data,((0,self.splitsize -data.shape[0]),(0,0)),'constant') nt, nc =data.shape fs = 400 feat = [] if self.smooth: data=resample(data,int(nt/fs*self.smooth_Hz)) nt, nc = data.shape fs = self.smooth_Hz lvl = np.array([0.1, 4, 8, 12, 30, 70]) if self.with_six or self.with_dy: D = np.absolute(np.fft.rfft(data, axis=0)) D[0, :] = 0 # set the DC component to zero for i in range(nc): D[:,i] /= D[:,i].sum() # Normalize each channel coorD = np.corrcoef(D.transpose()) w = Eigenvalues().apply(coorD) tfreq = self.power_edge ppow = 0.5 # top_freq = int(round(nt / sfreq * tfreq)) + 1 top = int(round(nt / fs * tfreq)) spedge = np.cumsum(D[:top, :], axis=0) spedge = np.argmin(np.abs(spedge - (spedge.max(axis=0) * ppow)), axis=0) spedge = spedge / top * tfreq feat.append(w) feat.append(spedge.ravel()) if self.with_six: lseg = np.round(nt / fs * lvl).astype('int') sixspect = np.zeros((len(lvl) - 1, nc)) for j in range(len(sixspect)): sixspect[j, :] = 2 * np.sum(D[lseg[j]:lseg[j + 1], :], axis=0) spentropy = -1 * np.sum(np.multiply(sixspect, np.log(sixspect)), axis=0) feat.append(sixspect.ravel()) feat.append(spentropy.ravel()) if self.with_dy: ldat = int(floor(nt / 2.0)) no_levels = int(floor(log(ldat, 2.0))) dspect = np.zeros((no_levels, nc)) for j in range(no_levels - 1, -1, -1): dspect[j, :] = 2 * np.sum(D[int(floor(ldat / 2.0)):ldat, :], axis=0) ldat = int(floor(ldat / 2.0)) spentropyDyd = -1 * np.sum(np.multiply(dspect, np.log(dspect)), axis=0) feat.append(dspect.ravel()) feat.append(spentropyDyd.ravel()) if self.with_mc: mobility = np.divide( np.std(np.diff(data, axis=0)), np.std(data, axis=0)) complexity = np.divide(np.divide( # std of second derivative for each channel np.std(np.diff(np.diff(data, axis=0), axis=0), axis=0), # std of second derivative for each channel np.std(np.diff(data, axis=0), axis=0)) , mobility) feat.append(mobility) feat.append(complexity) if self.with_time_corr: data1 = TimeCorrelation(self.max_hz, self.scale_option).apply(data.transpose()) feat.append(data1) if self.with_equal_freq: data2 = FreqCorrelation(self.start, self.end, self.scale_option, self.resample_size, with_fft=True, with_corr=True).apply(data.transpose()) feat.append(data2) if self.onlyfd_dfa: fd = np.zeros((2, nc)) for j in range(nc): fd[0, j] = pyeeg.pfd(data[:, j]) fd[1, j] = pyeeg.hfd(data[:, j], 3) DFA = np.zeros(nc) for j in range(nc): DFA[j] = pyeeg.dfa(data[:, j]) feat=np.concatenate(( fd.ravel(), DFA.ravel(), np.sqrt(DFA).ravel(), np.square(DFA.ravel()), np.sqrt(fd).ravel(), np.square(fd).ravel(), )) if self.with_square or self.with_log or self.with_sqrt: tmp = np.concatenate(feat, axis=0) tmp = np.absolute(tmp) if self.with_square: feat.append(np.square(tmp)) if self.with_log: feat.append(np.log(tmp)) if self.with_sqrt: feat.append(np.sqrt(tmp)) return np.concatenate(feat, axis=0)
def nms_fast(in_corners, H, W, dist_thresh): """ Run a faster approximate Non-Max-Suppression on numpy corners shaped: 3xN [x_i,y_i,conf_i]^T Algo summary: Create a grid sized HxW. Assign each corner location a 1, rest are zeros. Iterate through all the 1's and convert them either to -1 or 0. Suppress points by setting nearby values to 0. Grid Value Legend: -1 : Kept. 0 : Empty or suppressed. 1 : To be processed (converted to either kept or supressed). NOTE: The NMS first rounds points to integers, so NMS distance might not be exactly dist_thresh. It also assumes points are within image boundaries. Inputs in_corners - 3xN numpy array with corners [x_i, y_i, confidence_i]^T. H - Image height. W - Image width. dist_thresh - Distance to suppress, measured as an infinty norm distance. Returns nmsed_corners - 3xN numpy matrix with surviving corners. nmsed_inds - N length numpy vector with surviving corner indices. """ grid = np.zeros((H, W)).astype(int) # Track NMS data. inds = np.zeros((H, W)).astype(int) # Store indices of points. # Sort by confidence and round to nearest int. inds1 = np.argsort(-in_corners[2,:]) corners = in_corners[:,inds1] rcorners = corners[:2,:].round().astype(int) # Rounded corners. # Check for edge case of 0 or 1 corners. if rcorners.shape[1] == 0: return np.zeros((3,0)).astype(int), np.zeros(0).astype(int) if rcorners.shape[1] == 1: out = np.vstack((rcorners, in_corners[2])).reshape(3,1) return out, np.zeros((1)).astype(int) # Initialize the grid. for i, rc in enumerate(rcorners.T): grid[rcorners[1,i], rcorners[0,i]] = 1 inds[rcorners[1,i], rcorners[0,i]] = i # Pad the border of the grid, so that we can NMS points near the border. pad = dist_thresh grid = np.pad(grid, ((pad,pad), (pad,pad)), mode='constant') # Iterate through points, highest to lowest conf, suppress neighborhood. count = 0 for i, rc in enumerate(rcorners.T): # Account for top and left padding. pt = (rc[0]+pad, rc[1]+pad) if grid[pt[1], pt[0]] == 1: # If not yet suppressed. grid[pt[1]-pad:pt[1]+pad+1, pt[0]-pad:pt[0]+pad+1] = 0 grid[pt[1], pt[0]] = -1 count += 1 # Get all surviving -1's and return sorted array of remaining corners. keepy, keepx = np.where(grid==-1) keepy, keepx = keepy - pad, keepx - pad inds_keep = inds[keepy, keepx] out = corners[:, inds_keep] values = out[-1, :] inds2 = np.argsort(-values) out = out[:, inds2] out_inds = inds1[inds_keep[inds2]] return out, out_inds
def padding(_img, padd): # Add padding _img = np.pad(_img, pad_width=[(padd, padd), (padd, padd)], mode='constant', constant_values=0) return _img
def sde(self): """ Support adding kernels for sde representation """ import scipy.linalg as la F = None L = None Qc = None H = None Pinf = None P0 = None dF = None dQc = None dPinf = None dP0 = None n = 0 nq = 0 nd = 0 # Assign models for p in self.parts: (Ft,Lt,Qct,Ht,Pinft,P0t,dFt,dQct,dPinft,dP0t) = p.sde() F = la.block_diag(F,Ft) if (F is not None) else Ft L = la.block_diag(L,Lt) if (L is not None) else Lt Qc = la.block_diag(Qc,Qct) if (Qc is not None) else Qct H = np.hstack((H,Ht)) if (H is not None) else Ht Pinf = la.block_diag(Pinf,Pinft) if (Pinf is not None) else Pinft P0 = la.block_diag(P0,P0t) if (P0 is not None) else P0t if dF is not None: dF = np.pad(dF,((0,dFt.shape[0]),(0,dFt.shape[1]),(0,dFt.shape[2])), 'constant', constant_values=0) dF[-dFt.shape[0]:,-dFt.shape[1]:,-dFt.shape[2]:] = dFt else: dF = dFt if dQc is not None: dQc = np.pad(dQc,((0,dQct.shape[0]),(0,dQct.shape[1]),(0,dQct.shape[2])), 'constant', constant_values=0) dQc[-dQct.shape[0]:,-dQct.shape[1]:,-dQct.shape[2]:] = dQct else: dQc = dQct if dPinf is not None: dPinf = np.pad(dPinf,((0,dPinft.shape[0]),(0,dPinft.shape[1]),(0,dPinft.shape[2])), 'constant', constant_values=0) dPinf[-dPinft.shape[0]:,-dPinft.shape[1]:,-dPinft.shape[2]:] = dPinft else: dPinf = dPinft if dP0 is not None: dP0 = np.pad(dP0,((0,dP0t.shape[0]),(0,dP0t.shape[1]),(0,dP0t.shape[2])), 'constant', constant_values=0) dP0[-dP0t.shape[0]:,-dP0t.shape[1]:,-dP0t.shape[2]:] = dP0t else: dP0 = dP0t n += Ft.shape[0] nq += Qct.shape[0] nd += dFt.shape[2] assert (F.shape[0] == n and F.shape[1]==n), "SDE add: Check of F Dimensions failed" assert (L.shape[0] == n and L.shape[1]==nq), "SDE add: Check of L Dimensions failed" assert (Qc.shape[0] == nq and Qc.shape[1]==nq), "SDE add: Check of Qc Dimensions failed" assert (H.shape[0] == 1 and H.shape[1]==n), "SDE add: Check of H Dimensions failed" assert (Pinf.shape[0] == n and Pinf.shape[1]==n), "SDE add: Check of Pinf Dimensions failed" assert (P0.shape[0] == n and P0.shape[1]==n), "SDE add: Check of P0 Dimensions failed" assert (dF.shape[0] == n and dF.shape[1]==n and dF.shape[2]==nd), "SDE add: Check of dF Dimensions failed" assert (dQc.shape[0] == nq and dQc.shape[1]==nq and dQc.shape[2]==nd), "SDE add: Check of dQc Dimensions failed" assert (dPinf.shape[0] == n and dPinf.shape[1]==n and dPinf.shape[2]==nd), "SDE add: Check of dPinf Dimensions failed" assert (dP0.shape[0] == n and dP0.shape[1]==n and dP0.shape[2]==nd), "SDE add: Check of dP0 Dimensions failed" return (F,L,Qc,H,Pinf,P0,dF,dQc,dPinf,dP0)
def dice_images( sliceSavePath, imageWidth, imageHeight, pixelPitch, x_boundries, y_boundries, overlap, progress_handle=None, ): dicedSavePath = Path(sliceSavePath) / "diced_images" fullSavePath = Path(sliceSavePath) / "full_sized_images" if not Path(dicedSavePath).exists(): Path(dicedSavePath).mkdir() if not Path(fullSavePath).exists(): Path(fullSavePath).mkdir() images = glob.glob(str(sliceSavePath / "*.png")) image_count = len(images) for i, image_path in enumerate(images): image_path = Path(image_path) filename = image_path.stem extention = image_path.suffix img = np.array(Image.open(image_path)) last_x = 0 for j, x in enumerate(x_boundries): x = int(x) if x == 0: continue last_y = 0 for k, y in enumerate(y_boundries): y = int(y) if y == 0: continue sub_img = img[last_y:y, last_x:x] # pad image img_width = x - last_x img_height = y - last_y pad_x = printer.width - img_width pad_y = printer.height - img_height sub_img = np.pad( sub_img, ( (pad_y // 2, pad_y - (pad_y // 2)), (pad_x // 2, pad_x - (pad_x // 2)), ), ) sub_img = Image.fromarray(sub_img).convert("L") sub_img.save( dicedSavePath / f"{filename}_stitch_x{j-1}_y{k-1}{extention}" ) last_y = y - overlap last_x = x - overlap image_path.replace(fullSavePath / f"{filename}{extention}") if progress_handle is not None: progress_handle("Dicing images", i + 1, image_count) with open(sliceSavePath / "stitching_info.json", "w") as file: info = { "pixel_pitch": pixelPitch, "x_boundries": x_boundries, "y_boundries": y_boundries, "overlap": overlap, } json.dump(info, file)
# test_data_path = 'F:/data/private/speech/vctk/qts/p225_001.npy' # target_speaker_ivec = ivecs[speaker2id(target_speaker)] receptive_field = hp.dilations[-1] * hp.size speaker_emb = tf.placeholder(dtype=tf.float32, shape=(hp.batch_size, len(hp.speakers))) _input = tf.placeholder(dtype=tf.float32, shape=(hp.batch_size, receptive_field, hp.Q)) _z_q = tf.placeholder(dtype=tf.float32, shape=(hp.batch_size, receptive_field, hp.D)) x = decoder(_input, speaker_emb, _z_q, is_training=False) out = tf.multinomial(tf.squeeze(x, 1), num_samples=1, output_dtype=tf.int32) for j in range(0, len(test_data_paths)): test_qt = np.load(test_data_paths[j][0]) test_speaker = test_data_paths[j][1] x = np.pad(test_qt, ([0, hp.T], [0, 0]), mode="constant", constant_values=0)[:hp.T, :] x = np.expand_dims(x, 0) test_speaker = np.expand_dims(test_speaker, 0) # target_speaker_ivec = np.expand_dims(target_speaker_ivec,0) z_q, x, _speaker_emb = sess.run([g.z_q, g.encoder_inputs, g.speakers], feed_dict={ g.x: x, g.speaker_id: test_speaker }) #shape = (B,T,K) output = np.squeeze(x) inputs = x[:, :receptive_field, :] # (B,r,Q) # decode and get multinomial distribuition for i in tqdm(range(hp.T - receptive_field - 1), total=hp.T - receptive_field,
def zeropad_to_max_len(data, max_len=121): return np.pad(data, [(0, 0), (0, max_len - data.shape[1]), (0, 0)], mode="constant")
def load_umc_sheets(data_dir="/home/matthias/Data/umc_mozart", require_performance=False): """ load unwarpped sheets """ import glob import cv2 # initialize omr system from omr.omr_app import OpticalMusicRecognizer from omr.utils.data import prepare_image from lasagne_wrapper.network import SegmentationNetwork from omr.models import system_detector, bar_detector net = system_detector.build_model() system_net = SegmentationNetwork(net, print_architecture=False) system_net.load('sheet_utils/omr_models/system_params.pkl') net = bar_detector.build_model() bar_net = SegmentationNetwork(net, print_architecture=False) bar_net.load('sheet_utils/omr_models/bar_params.pkl') piece_names = [] unwrapped_sheets = [] piece_paths = [] # get list of all pieces piece_dirs = np.sort(glob.glob(os.path.join(data_dir, '*'))) n_pieces = len(piece_dirs) # iterate pieces kept_pages = 0 for i_piece, piece_dir in enumerate(piece_dirs): piece_name = piece_dir.split('/')[-1] # if "214_" not in piece_name: # continue print(col.print_colored("Processing piece %d of %d (%s)" % (i_piece + 1, n_pieces, piece_name), col.OKBLUE)) # check if there is a performance if require_performance and len(glob.glob(os.path.join(piece_dir, "*performance*"))) == 0: print("No performance found!") continue # load pages page_paths = np.sort(glob.glob(os.path.join(piece_dir, "sheet/*.png"))) if len(page_paths) == 0: print("No sheet available!!!") continue unwrapped_sheet = np.zeros((SYSTEM_HEIGHT, 0), dtype=np.uint8) system_problem = False for i_page, page_path in enumerate(page_paths): kept_pages += 1 # load sheet image I = cv2.imread(page_path, 0) # load system coordinates # page_id = i_page + 1 # page_systems = np.load(os.path.join(piece_dir, "coords", "systems_%02d.npy" % (i_page + 1))) # detect systems I_prep = prepare_image(I) omr = OpticalMusicRecognizer(note_detector=None, system_detector=system_net, bar_detector=bar_net) try: page_systems = omr.detect_systems(I_prep, verbose=False) except: print("Problem in system detection!!!") system_problem = True continue # plt.figure("System Localization") # plt.clf() # plt.imshow(I, cmap=plt.cm.gray) # plt.xlim([0, I.shape[1] - 1]) # plt.ylim([I.shape[0] - 1, 0]) # for system in page_systems: # plt.plot(system[:, 1], system[:, 0], 'mo', alpha=0.5) # plt.show(block=True) # unwrap sheet for system in page_systems: r0 = int(np.mean([system[0, 0], system[2, 0]])) - SYSTEM_HEIGHT // 2 r1 = r0 + SYSTEM_HEIGHT c0 = int(system[0, 1]) c1 = int(system[1, 1]) # fix row slice coordinates r0 = max(0, r0) r1 = min(r1, I.shape[0]) r0 = max(r0, r1 - SYSTEM_HEIGHT) staff_img = I[r0:r1, c0:c1].astype(np.uint8) if staff_img.shape[0] < SYSTEM_HEIGHT: to_pad = SYSTEM_HEIGHT - staff_img.shape[0] if to_pad > (0.1 * SYSTEM_HEIGHT): print("Problem in system padding!!!") continue staff_img = np.pad(staff_img, ((0, to_pad), (0, 0)), mode="edge") unwrapped_sheet = np.hstack((unwrapped_sheet, staff_img)) # plt.figure("Unwrapped") # plt.imshow(unwrapped_sheet) # plt.show(block=True) if not system_problem: piece_names.append(piece_name) piece_paths.append(piece_dir) unwrapped_sheets.append(unwrapped_sheet) print("%d pieces covering %d pages of sheet music." % (len(piece_names), kept_pages)) return piece_names, piece_paths, unwrapped_sheets
def maux(output_text, num): print("debug -- django") ## Info & args # parser = argparse.ArgumentParser( # formatter_class=argparse.ArgumentDefaultsHelpFormatter # ) # parser.add_argument("-e", "--enc_model_fpath", type=Path, # default="D:/RemindMe/django-remindme/mysite/trained model/encoder/saved_models/pretrained.pt", # help="Path to a saved encoder") # parser.add_argument("-s", "--syn_model_dir", type=Path, # default="D:/RemindMe/django-remindme/mysite/trained model/synthesizer/saved_models/logs-pretrained/", # help="Directory containing the synthesizer model") # parser.add_argument("-v", "--voc_model_fpath", type=Path, # default="D:/RemindMe/django-remindme/mysite/trained model/vocoder/saved_models/pretrained/pretrained.pt", # help="Path to a saved vocoder") # parser.add_argument("--low_mem", action="store_true", help=\ # "If True, the memory used by the synthesizer will be freed after each use. Adds large " # "overhead but allows to save some GPU memory for lower-end GPUs.") # parser.add_argument("--no_sound", action="store_true", help=\ # "If True, audio won't be played.") # args = parser.parse_args() # print_args(args, parser) # if not args.no_sound: # import sounddevice as sd ## Print some environment information (for debugging purposes) print("Running a test of your configuration...\n") if not torch.cuda.is_available(): print( "Your PyTorch installation is not configured to use CUDA. If you have a GPU ready " "for deep learning, ensure that the drivers are properly installed, and that your " "CUDA version matches your PyTorch installation. CPU-only inference is currently " "not supported.", file=sys.stderr) quit(-1) device_id = torch.cuda.current_device() gpu_properties = torch.cuda.get_device_properties(device_id) print( "Found %d GPUs available. Using GPU %d (%s) of compute capability %d.%d with " "%.1fGb total memory.\n" % (torch.cuda.device_count(), device_id, gpu_properties.name, gpu_properties.major, gpu_properties.minor, gpu_properties.total_memory / 1e9)) ## Load the models one by one. print("Preparing the encoder, the synthesizer and the vocoder...") #encoder.load_model(args.enc_model_fpath) #synthesizer = Synthesizer(args.syn_model_dir.joinpath("taco_pretrained"), low_mem=args.low_mem) #vocoder.load_model(args.voc_model_fpath) encoder.load_model( "D:/RemindMe/django-remindme/mysite/trained model/encoder/saved_models/pretrained.pt" ) synthesizer = Synthesizer( "D:/RemindMe/django-remindme/mysite/trained model/synthesizer/saved_models/logs-pretrained/taco_pretrained", low_mem=False) vocoder.load_model( "D:/RemindMe/django-remindme/mysite/trained model/vocoder/saved_models/pretrained/pretrained.pt" ) ## Run a test print("Testing your configuration with small inputs.") # Forward an audio waveform of zeroes that lasts 1 second. Notice how we can get the encoder's # sampling rate, which may differ. # If you're unfamiliar with digital audio, know that it is encoded as an array of floats # (or sometimes integers, but mostly floats in this projects) ranging from -1 to 1. # The sampling rate is the number of values (samples) recorded per second, it is set to # 16000 for the encoder. Creating an array of length <sampling_rate> will always correspond # to an audio of 1 second. print("\tTesting the encoder...") encoder.embed_utterance(np.zeros(encoder.sampling_rate)) # Create a dummy embedding. You would normally use the embedding that encoder.embed_utterance # returns, but here we're going to make one ourselves just for the sake of showing that it's # possible. embed = np.random.rand(speaker_embedding_size) # Embeddings are L2-normalized (this isn't important here, but if you want to make your own # embeddings it will be). embed /= np.linalg.norm(embed) # The synthesizer can handle multiple inputs with batching. Let's create another embedding to # illustrate that embeds = [embed, np.zeros(speaker_embedding_size)] texts = ["test 1", "test 2"] print( "\tTesting the synthesizer... (loading the model will output a lot of text)" ) mels = synthesizer.synthesize_spectrograms(texts, embeds) # The vocoder synthesizes one waveform at a time, but it's more efficient for long ones. We # can concatenate the mel spectrograms to a single one. mel = np.concatenate(mels, axis=1) # The vocoder can take a callback function to display the generation. More on that later. For # now we'll simply hide it like this: no_action = lambda *args: None print("\tTesting the vocoder...") # For the sake of making this test short, we'll pass a short target length. The target length # is the length of the wav segments that are processed in parallel. E.g. for audio sampled # at 16000 Hertz, a target length of 8000 means that the target audio will be cut in chunks of # 0.5 seconds which will all be generated together. The parameters here are absurdly short, and # that has a detrimental effect on the quality of the audio. The default parameters are # recommended in general. vocoder.infer_waveform(mel, target=200, overlap=50, progress_callback=no_action) print("All test passed! You can now synthesize speech.\n\n") ## Interactive speech generation print( "This is a GUI-less example of interface to SV2TTS. The purpose of this script is to " "show how you can interface this project easily with your own. See the source code for " "an explanation of what is happening.\n") print("Interactive generation loop") in_fpath = Path( "D:/RemindMe/django-remindme/mysite/trained model/sam_narration2.wav") preprocessed_wav = encoder.preprocess_wav(in_fpath) original_wav, sampling_rate = librosa.load(in_fpath) preprocessed_wav = encoder.preprocess_wav(original_wav, sampling_rate) print("Loaded file succesfully") embed = encoder.embed_utterance(preprocessed_wav) print("Created the embedding") embeds = [embed] text = output_text texts = [text] specs = synthesizer.synthesize_spectrograms(texts, embeds) spec = specs[0] print("Created the mel spectrogram") ## Generating the waveform print("Synthesizing the waveform:") # Synthesizing the waveform is fairly straightforward. Remember that the longer the # spectrogram, the more time-efficient the vocoder. generated_wav = vocoder.infer_waveform(spec) ## Post-generation # There's a bug with sounddevice that makes the audio cut one second earlier, so we # pad it. generated_wav = np.pad(generated_wav, (0, synthesizer.sample_rate), mode="constant") # Play the audio (non-blocking) # Save it on the disk filexpath = "D:/RemindMe/django_remindme_model/mysite/media/demo_output_%02d.wav" % num fx = "demo_output_%02d" % num print(generated_wav.dtype) librosa.output.write_wav(filexpath, generated_wav.astype(np.float32), synthesizer.sample_rate) print("\nSaved output as %s\n\n" % filexpath) return fx
def append_tile(array, geom, tot_array, tot_geom): """ Append a tile to a larger arrayset. Args: array: projection stack geom: geometry descritption tot_array: output array tot_geom: output geometry """ print('Stitching a tile...') # Assuming all projections have equal number of angles and same pixel sizes total_shape = tot_array.shape[::2] det_shape = array.shape[::2] if numpy.abs(tot_geom['det_pixel'] - geom['det_pixel']) > 1e-6: raise Exception('This array has different detector pixels! %f v.s. %f. Aborting!' % (geom['det_pixel'], tot_geom['det_pixel'])) if tot_array.shape[1] != array.shape[1]: raise Exception('This array has different number of projections from the others. %u v.s. %u. Aborting!' % (array.shape[1], tot_array.shape[1])) total_size = tot_geom.detector_size(total_shape) det_size = geom.detector_size(det_shape) # Offset from the left top corner: y0, x0 = tot_geom.detector_centre() y, x = geom.detector_centre() x_offset = ((x - x0) + total_size[1] / 2 - det_size[1] / 2) / geom.pixel[1] y_offset = ((y - y0) + total_size[0] / 2 - det_size[0] / 2) / geom.pixel[0] # Round em up! x_offset = int(numpy.round(x_offset)) y_offset = int(numpy.round(y_offset)) # Pad image to get the same size as the total_slice: pad_x = tot_array.shape[2] - array.shape[2] pad_y = tot_array.shape[0] - array.shape[0] # Collapce both arraysets and compute residual shift shift = _find_shift_(tot_array, array, [y_offset, x_offset]) x_offset += shift[1] y_offset += shift[0] # Precompute weights: base0 = (tot_array[:, ::100, :].mean(1)) != 0 new0 = numpy.zeros_like(base0) # Shift image: new0[:det_shape[0], :det_shape[1]] = 1.0 new0 = interp.shift(new0, [y_offset, x_offset], order = 1) #new0[y_offset:int(y_offset+det_shape[0]), x_offset:int(x_offset + det_shape[1])] = 1.0 base_dist = ndimage.distance_transform_bf(base0) new_dist = ndimage.distance_transform_bf(new0) # Trim edges to avoid interpolation errors: base_dist -= 1 new_dist -= 1 base_dist *= base_dist > 0 new_dist *= new_dist > 0 norm = (base_dist + new_dist) norm[norm == 0] = numpy.inf time.sleep(0.5) # Apply offsets: for ii in tqdm(range(tot_array.shape[1]), unit='img'): # Pad to match sizes: new = numpy.pad(array[:, ii, :], ((0, pad_y), (0, pad_x)), mode = 'constant') # Apply shift: if (x_offset != 0) | (y_offset != 0): # Shift image: new = interp.shift(new, [y_offset, x_offset], order = 1) # Add two images in a smart way: base = tot_array[:, ii, :] # Create distances to edge: tot_array[:, ii, :] = ((base_dist * base) + (new_dist * new)) / norm
def _process_utterance(out_dir, index, wav_path, text): # Load the audio to a numpy array: wav = audio.load_wav(wav_path) if hparams.rescaling: wav = wav / np.abs(wav).max() * hparams.rescaling_max # Mu-law quantize if is_mulaw_quantize(hparams.input_type): # [0, quantize_channels) out = P.mulaw_quantize(wav, hparams.quantize_channels) # Trim silences start, end = audio.start_and_end_indices(out, hparams.silence_threshold) wav = wav[start:end] out = out[start:end] constant_values = P.mulaw_quantize(0, hparams.quantize_channels) out_dtype = np.int16 elif is_mulaw(hparams.input_type): # [-1, 1] out = P.mulaw(wav, hparams.quantize_channels) constant_values = P.mulaw(0.0, hparams.quantize_channels) out_dtype = np.float32 else: # [-1, 1] out = wav constant_values = 0.0 out_dtype = np.float32 # Compute a mel-scale spectrogram from the trimmed wav: # (N, D) mel_spectrogram = audio.melspectrogram(wav).astype(np.float32).T # lws pads zeros internally before performing stft # this is needed to adjust time resolution between audio and mel-spectrogram l, r = audio.lws_pad_lr(wav, hparams.fft_size, audio.get_hop_size()) # zero pad for quantized signal out = np.pad(out, (l, r), mode="constant", constant_values=constant_values) N = mel_spectrogram.shape[0] assert len(out) >= N * audio.get_hop_size() # time resolution adjustment # ensure length of raw audio is multiple of hop_size so that we can use # transposed convolution to upsample out = out[:N * audio.get_hop_size()] assert len(out) % audio.get_hop_size() == 0 timesteps = len(out) # Write the spectrograms to disk: audio_filename = 'ljspeech-audio-%05d.npy' % index mel_filename = 'ljspeech-mel-%05d.npy' % index np.save(os.path.join(out_dir, audio_filename), out.astype(out_dtype), allow_pickle=False) np.save(os.path.join(out_dir, mel_filename), mel_spectrogram.astype(np.float32), allow_pickle=False) # Return a tuple describing this training example: return (audio_filename, mel_filename, timesteps, text)
def resize_image(image, min_dim=None, max_dim=None, min_scale=None, mode="square"): """Resizes an image keeping the aspect ratio unchanged. min_dim: if provided, resizes the image such that it's smaller dimension == min_dim max_dim: if provided, ensures that the image longest side doesn't exceed this value. min_scale: if provided, ensure that the image is scaled up by at least this percent even if min_dim doesn't require it. mode: Resizing mode. none: No resizing. Return the image unchanged. square: Resize and pad with zeros to get a square image of size [max_dim, max_dim]. pad64: Pads width and height with zeros to make them multiples of 64. If min_dim or min_scale are provided, it scales the image up before padding. max_dim is ignored in this mode. The multiple of 64 is needed to ensure smooth scaling of feature maps up and down the 6 levels of the FPN pyramid (2**6=64). crop: Picks random crops from the image. First, scales the image based on min_dim and min_scale, then picks a random crop of size min_dim x min_dim. Can be used in training only. max_dim is not used in this mode. Returns: image: the resized image window: (y1, x1, y2, x2). If max_dim is provided, padding might be inserted in the returned image. If so, this window is the coordinates of the image part of the full image (excluding the padding). The x2, y2 pixels are not included. scale: The scale factor used to resize the image padding: Padding added to the image [(top, bottom), (left, right), (0, 0)] """ # Keep track of image dtype and return results in the same dtype image_dtype = image.dtype # Default window (y1, x1, y2, x2) and default scale == 1. h, w = image.shape[:2] window = (0, 0, h, w) scale = 1 padding = [(0, 0), (0, 0), (0, 0)] crop = None if mode == "none": return image, window, scale, padding, crop # Scale? if min_dim: # Scale up but not down scale = max(1, min_dim / min(h, w)) if min_scale and scale < min_scale: scale = min_scale # Does it exceed max dim? if max_dim and mode == "square": image_max = max(h, w) if round(image_max * scale) > max_dim: scale = max_dim / image_max # Resize image using bilinear interpolation if scale != 1: image = resize(image, (round(h * scale), round(w * scale)), preserve_range=True) # Need padding or cropping? if mode == "square": # Get new height and width h, w = image.shape[:2] top_pad = (max_dim - h) // 2 bottom_pad = max_dim - h - top_pad left_pad = (max_dim - w) // 2 right_pad = max_dim - w - left_pad padding = [(top_pad, bottom_pad), (left_pad, right_pad), (0, 0)] image = np.pad(image, padding, mode='constant', constant_values=0) window = (top_pad, left_pad, h + top_pad, w + left_pad) elif mode == "pad64": h, w = image.shape[:2] # Both sides must be divisible by 64 assert min_dim % 64 == 0, "Minimum dimension must be a multiple of 64" # Height if h % 64 > 0: max_h = h - (h % 64) + 64 top_pad = (max_h - h) // 2 bottom_pad = max_h - h - top_pad else: top_pad = bottom_pad = 0 # Width if w % 64 > 0: max_w = w - (w % 64) + 64 left_pad = (max_w - w) // 2 right_pad = max_w - w - left_pad else: left_pad = right_pad = 0 padding = [(top_pad, bottom_pad), (left_pad, right_pad), (0, 0)] image = np.pad(image, padding, mode='constant', constant_values=0) window = (top_pad, left_pad, h + top_pad, w + left_pad) elif mode == "crop": # Pick a random crop h, w = image.shape[:2] y = random.randint(0, (h - min_dim)) x = random.randint(0, (w - min_dim)) crop = (y, x, min_dim, min_dim) image = image[y:y + min_dim, x:x + min_dim] window = (0, 0, min_dim, min_dim) else: raise Exception("Mode {} not supported".format(mode)) return image.astype(image_dtype), window, scale, padding, crop
def complete(self, comppars, X, parfile=None, sigma=0.0): """ Finds the best representation that can complete any missing part of the ASPA code. Input: any spectrum correctly converted into an ASPA code """ outDir = directory(comppars['outDir']) maskType = comppars['maskType'] centerScale = comppars['centerScale'] nIter = int(comppars['nIter']) outInterval = int(comppars['outInterval']) approach = comppars['approach'] make_corner = bool(comppars['make_corner']) beta1 = comppars['beta1'] beta2 = comppars['beta2'] lr = comppars['lr'] eps = comppars['eps'] hmcL = int(comppars['hmcL']) hmcEps = comppars['hmcEps'] hmcBeta = comppars['hmcBeta'] hmcAnneal = comppars['hmcAnneal'] checkpoint_dir = directory(comppars['checkpointDir']) build_directories(comppars) if type(X) == dict: X_to_split = copy.deepcopy(X) elif type(X) == str: if X[-3:] == 'dat': X_to_split = str(X) else: X_to_split = np.array(X) try: if type(X) != np.ndarray: true_spectrum = X else: true_spectrum = None except IOError: true_spectrum = None with self.sess.as_default(): try: tf.global_variables_initializer().run() except: tf.initialize_all_variables().run() isLoaded = self.load(checkpoint_dir) assert (isLoaded) grids = Grids() wnw_grid = grids.wnw_grid nImgs = self.batch_size batch_idxs = int(np.ceil(nImgs / self.batch_size)) if maskType == 'random': fraction_masked = 0.2 mask = np.ones(self.image_shape) mask[ np.random.random(self.image_shape[:2]) < fraction_masked] = 0.0 elif maskType == 'center': assert (centerScale <= 0.5) mask = np.ones(self.image_shape) l = int(self.image_size * centerScale) u = int(self.image_size * (1.0 - centerScale)) mask[l:u, l:u, :] = 0.0 elif maskType == 'left': mask = np.ones(self.image_shape) c = self.image_size // 2 mask[:, :c, :] = 0.0 elif maskType == 'full': mask = np.ones(self.image_shape) elif maskType == 'grid': mask = np.zeros(self.image_shape) mask[::4, ::4, :] = 1.0 elif maskType == 'lowres': mask = np.zeros(self.image_shape) elif maskType == 'parameters': assert (centerScale <= 0.5) mask = np.ones(self.image_shape) mask[-3:, :, :] = 0.0 mask[:, -3:, :] = 0.0 mask[-10:, -10:, :] = 0.0 elif maskType == 'wfc3': assert (centerScale <= 0.5) m_size = self.image_size - 10 mask = np.ones(self.image_shape) fake_spec = np.ones(m_size**2) fake_spec[:334] = 0.0 fake_spec[384:] = 0.0 fake_spec = fake_spec.reshape((m_size, m_size)) mask[:m_size, :m_size, 0] = fake_spec mask[-8:, :, :] = 0.0 mask[:, -10:, :] = 0.0 mask[-10:, -10:, :] = 0.0 else: assert (False) for idx in xrange(0, batch_idxs): l = idx * self.batch_size u = min((idx + 1) * self.batch_size, nImgs) batchSz = u - l if type(X) != str: Xtrue = get_spectral_matrix(X, size=self.image_size - 10) Xt = get_test_image(X, sigma=sigma, size=self.image_size, batch_size=self.batch_size) else: Xtrue = get_spectral_matrix(X, parfile=parfile, size=self.image_size - 10) Xt = get_test_image(X, sigma=sigma, size=self.image_size, batch_size=self.batch_size, parfile=parfile) spec_parameters = get_parameters(Xtrue, size=self.image_size) batch = Xt batch_images = np.array(batch).astype(np.float32) if batchSz < self.batch_size: print(batchSz) padSz = ((0, int(self.batch_size - batchSz)), (0, 0), (0, 0), (0, 0)) batch_images = np.pad(batch_images, padSz, 'constant') batch_images = batch_images.astype(np.float32) zhats = np.random.uniform(-1, 1, size=(self.batch_size, self.z_dim)) m = 0 v = 0 nImgs = 1 nRows = int(np.sqrt(nImgs)) nCols = int(np.sqrt(nImgs)) # save_images(batch_images[:nImgs, :, :, :], [nRows, nCols], # os.path.join(config.outDir, 'before.pdf')) plt.imsave(os.path.join(outDir, 'before.png'), Xtrue[:, :, 0], cmap='gist_gray', format='png') plt.close() resize(os.path.join(outDir, 'before.png')) masked_images = np.multiply(batch_images, mask) # save_images(masked_images[:nImgs, :, :, :], [nRows, nCols], # os.path.join(config.outDir, 'masked.pdf')) plt.imsave(os.path.join(outDir, 'masked.png'), masked_images[0, :, :, 0], cmap='gist_gray', format='png') plt.close() resize(os.path.join(outDir, 'masked.png')) for img in range(batchSz): with open( os.path.join(outDir, 'logs/hats_{:02d}.log'.format(img)), 'a') as f: f.write('iter loss ' + ' '.join( ['z{}'.format(zi) for zi in range(self.z_dim)]) + '\n') for i in xrange(nIter): fd = { self.z: zhats, self.mask: mask, self.images: batch_images, self.is_training: False } run = [self.complete_loss, self.grad_complete_loss, self.G] loss, g, G_imgs = self.sess.run(run, feed_dict=fd) for img in range(batchSz): with open( os.path.join(outDir, 'logs/hats_{:02d}.log'.format(img)), 'ab') as f: f.write('{} {} '.format(i, loss[img]).encode()) np.savetxt(f, zhats[img:img + 1]) if i % outInterval == 0: prediction_file = open( outDir + 'predictions/prediction_{:04d}.txt'.format(i), 'w') ranges = [] ground_truths = [] gan_avg = [] gan_p_err = [] gan_m_err = [] if type(X) == str: """ If the input spectrum is synthetic, you know the parameters array and you want to compare the real value with the retrieved one, if your spectrum does not contain a molecule, the default value is fixed to -7.9 """ pp = ParameterParser() pp.read(parfile) real_pars = pp.full_dict() real_tp = float(real_pars['Atmosphere']['tp_iso_temp']) real_rp = float(real_pars['Planet']['radius']) real_mp = float(real_pars['Planet']['mass']) atm_active_gases = np.array([ gas.upper() for gas in real_pars['Atmosphere']['active_gases'] ]) atm_active_gases_mixratios = np.array( real_pars['Atmosphere']['active_gases_mixratios']) real_mol = check_molecule_existence( ['CO', 'CO2', 'H2O', 'CH4'], atm_active_gases_mixratios, atm_active_gases, default=-7.9) ground_truths = np.array(real_mol + [real_rp, real_mp, real_tp]) elif true_spectrum != None and type(X) != str: h2o = np.log10(true_spectrum['param']['h2o_mixratio']) ch4 = np.log10(true_spectrum['param']['ch4_mixratio']) co2 = np.log10(true_spectrum['param']['co2_mixratio']) co = np.log10(true_spectrum['param']['co_mixratio']) rp = true_spectrum['param']['planet_radius'] / RJUP mp = true_spectrum['param']['planet_mass'] / MJUP tp = true_spectrum['param']['temperature_profile'] ground_truths = np.array( [co, co2, h2o, ch4, rp, mp, tp]) real_mol = np.zeros(4) else: ground_truths = np.array([None] * 7) real_mol = np.zeros(4) parameters = ['CO', 'CO2', 'H2O', 'CH4', 'Rp', 'Mp', 'Tp'] labels = [ '$\log{CO}$', '$\log{CO_2}$', '$\log{H_2O}$', '$\log{CH_4}$', '$R_p (R_j)$', '$M_p (M_j)$', '$T_p$' ] all_hists = [] for mol in parameters: prediction_file, gan_avg, gan_p_err, gan_m_err, ranges, all_hists = \ histogram_par(mol, G_imgs, batchSz, self.image_size, ground_truths, all_hists, prediction_file, gan_avg, gan_p_err, gan_m_err, ranges) all_hists = np.array(all_hists).T if make_corner: make_corner_plot(all_hists, ranges, labels, ground_truths, comppars, i) """ Plot histograms """ hist_dict = {} f, ax = plt.subplots(2, 4, figsize=(21, 15)) all_hists = all_hists.T ii = 0 jj = 0 for his in range(len(all_hists)): if his == 4: ii = 1 jj = 4 hist_dict[labels[his]] = {} weights = np.ones_like(all_hists[his]) / float( len(all_hists[his])) hist_dict[labels[his]]['histogram'] = all_hists[his] hist_dict[labels[his]]['weights'] = weights hist_dict[labels[his]]['bins'] = ranges[his] ax[ii, his - jj].hist(all_hists[his], bins=np.linspace( min(ranges[his]), max(ranges[his]), 20), color='firebrick', weights=weights) # ax[his].set_ylim(0, 1) ax[ii, his - jj].set_xlim(min(ranges[his]), max(ranges[his])) ax[ii, his - jj].axvline(gan_avg[his], c='g', label='ExoGAN mean') ax[ii, his - jj].axvline(ground_truths[his], c='b', label='Input value') ax[ii, his - jj].set_xlabel(labels[his] + \ ' = $%1.2f_{-%1.2f}^{%1.2f}$' % ( gan_avg[his], gan_m_err[his], gan_p_err[his])) if his == 3: ax[ii, his - jj].legend() # ax[his].annotate('$%1.2f_{-%1.2f}^{%1.2f}$' % (gan_avg[his], gan_p_err[his], gan_m_err[his]), # bbox=dict(boxstyle="round4", fc="w", alpha=0.5), # xy=(gan_avg[his], max(weights)*(0.9)), # xycoords='data') ax[ii, his - jj].axvline(gan_avg[his] + gan_p_err[his], c='k', linestyle='--') ax[ii, his - jj].axvline(gan_avg[his] - gan_m_err[his], c='k', linestyle='--') ax[-1, -1].axis('off') plt.subplots_adjust(right=1.2) histName = os.path.join( outDir, 'histograms/all_par/{:04d}.pdf'.format(i)) plt.savefig(histName, bbox_inches='tight') plt.close() histpickle = os.path.join( outDir, 'histograms/all_par/histogram.pickle') with open(histpickle, 'wb') as fp: pickle.dump(hist_dict, fp) real_spec = Xtrue[:self.image_size, :self.image_size, :] real_spec = real_spec[:23, :23, 0].flatten() chi_square = [] spectra = [] f, ax = plt.subplots(sharey=True, figsize=(12, 6)) for k in range(batchSz): spectrum = G_imgs[ k, :self.image_size, :self.image_size, :] spectrum = spectrum[:23, :23, 0].flatten() spectra.append(spectrum) chi_square.append( chisquare(spectrum[:440], f_exp=real_spec[:440])[0]) best_ind = chi_square.index(min(chi_square)) print(i, np.mean(loss[0:batchSz])) imgName = os.path.join(outDir, 'hats_imgs/{:04d}.png'.format(i)) # save_images(G_imgs[:nImgs, :, :, :], [nRows, nCols], imgName) plt.imsave(imgName, G_imgs[best_ind, :, :, 0], cmap='gist_gray', format='png') plt.close() resize(imgName) inv_masked_hat_images = np.multiply(G_imgs, 1.0 - mask) completed = masked_images + inv_masked_hat_images imgName = os.path.join(outDir, 'completed/{:04d}.png'.format(i)) # save_images(completed[:nImgs, :, :, :], [nRows, nCols], imgName) plt.imsave(imgName, completed[best_ind, :, :, 0], cmap='gist_gray', format='png') plt.close() resize(imgName) if spectra_int_norm: # Compared real spectrum with the generated one spectra_int_norm(Xtrue, self.image_size, wnw_grid, batchSz, G_imgs, comppars, i) if spectra_norm: # Compare spectra with original normalisation between 0 and 1 spectra_norm(Xtrue, self.image_size, wnw_grid, batchSz, G_imgs, comppars, i) if spectra_real_norm: # Compare spectra with the normalisation factor from the real spectrum spectra_real_norm(Xtrue, self.image_size, wnw_grid, batchSz, G_imgs, comppars, i) if approach == 'adam': # Optimize single completion with Adam m_prev = np.copy(m) v_prev = np.copy(v) m = beta1 * m_prev + (1 - beta1) * g[0] v = beta2 * v_prev + (1 - beta2) * np.multiply(g[0], g[0]) m_hat = m / (1 - beta1**(i + 1)) v_hat = v / (1 - beta2**(i + 1)) zhats += -np.true_divide(lr * m_hat, (np.sqrt(v_hat) + eps)) zhats = np.clip(zhats, -1, 1) elif approach == 'hmc': # Sample example completions with HMC (not in paper) zhats_old = np.copy(zhats) loss_old = np.copy(loss) v = np.random.randn(self.batch_size, self.z_dim) v_old = np.copy(v) for steps in range(hmcL): v -= hmcEps / 2 * hmcBeta * g[0] zhats += hmcEps * v np.copyto(zhats, np.clip(zhats, -1, 1)) loss, g, _, _ = self.sess.run(run, feed_dict=fd) v -= hmcEps / 2 * hmcBeta * g[0] for img in range(batchSz): logprob_old = hmcBeta * loss_old[img] + np.sum( v_old[img]**2) / 2 logprob = hmcBeta * loss[img] + np.sum(v[img]**2) / 2 accept = np.exp(logprob_old - logprob) if accept < 1 and np.random.uniform() > accept: np.copyto(zhats[img], zhats_old[img]) hmcBeta *= hmcAnneal else: assert (False)
for each in original_features: sm = [] for beach in cover_features: sm.append(euclidean(each, beach)) similarity_matrix.append(sm) return np.array(similarity_matrix) model = load_model(sys.argv[1]) original_song = sys.argv[2] cover_song = sys.argv[3] original_signal = librosa.load(original_song)[0] cover_signal = librosa.load(cover_song)[0] original_features = extract_features(original_signal) cover_features = extract_features(cover_signal) oti_cover_features = oti_func(original_features, cover_features) mat = sim_matrix(original_features, oti_cover_features) if mat.shape[0] < 180: mat = np.pad(mat, ((0,180 - mat.shape[0]),(0,0)), mode = 'constant', constant_values=0) if mat.shape[1] < 180: mat = np.pad(mat, ((0,0),(0,180 - mat.shape[1])), mode = 'constant', constant_values=0) ans = model.predict(mat.reshape(1,180,180,1)) if ans[0][0] < ans[0][1]: print("The song is a cover pair with probability of : {}".format(ans[0][1])) else: print("The song is not a cover pair with probability of : {}".format(ans[0][0]))
def coarseness(image, voxelspacing = None, mask = slice(None)): r""" Takes a simple or multi-spectral image and returns the coarseness of the texture. Step1 At each pixel, compute six averages for the windows of size 2**k x 2**k, k=0,1,...,5, around the pixel. Step2 At each pixel, compute absolute differences E between the pairs of non overlapping averages in every directions. step3 At each pixel, find the value of k that maximises the difference Ek in either direction and set the best size Sbest=2**k step4 Compute the coarseness feature Fcrs by averaging Sbest over the entire image. Parameters ---------- image : array_like or list/tuple of array_like A single image or a list/tuple of images (for multi-spectral case). voxelspacing : sequence of floats The side-length of each voxel. mask : array_like A binary mask for the image or a slice object Returns ------- coarseness : float The size of coarseness of the given texture. It is basically the size of repeating elements in the image. See Also -------- """ # Step1: At each pixel (x,y), compute six averages for the windows # of size 2**k x 2**k, k=0,1,...,5, around the pixel. image = numpy.asarray(image, dtype=numpy.float32) # set default mask or apply given mask if not type(mask) is slice: if not type(mask[0] is slice): mask = numpy.array(mask, copy=False, dtype = numpy.bool) image = image[mask] # set default voxel spacing if not suppliec if None == voxelspacing: voxelspacing = tuple([1.] * image.ndim) if len(voxelspacing) != image.ndim: print "Voxel spacing and image dimensions do not fit." return None # set padding for image border control padSize = tuple((numpy.rint((2**5.0) * voxelspacing[jj]),0) for jj in xrange(image.ndim)) Apad = numpy.pad(image,pad_width=padSize, mode='reflect') # Allocate memory E = numpy.empty((6,image.ndim)+image.shape) # prepare some slicer rawSlicer = [slice(None)] * image.ndim slicerForImageInPad = [slice(padSize[d][0],None)for d in xrange(image.ndim)] for k in xrange(6): size_vs = tuple(numpy.rint((2**k) * voxelspacing[jj]) for jj in xrange(image.ndim)) A = uniform_filter(Apad, size = size_vs, mode = 'mirror') # Step2: At each pixel, compute absolute differences E(x,y) between # the pairs of non overlapping averages in the horizontal and vertical directions. for d in xrange(image.ndim): borders = numpy.rint((2**k) * voxelspacing[d]) slicerPad_k_d = slicerForImageInPad[:] slicerPad_k_d[d]= slice((padSize[d][0]-borders if borders < padSize[d][0] else 0),None) A_k_d = A[slicerPad_k_d] AslicerL = rawSlicer[:] AslicerL[d] = slice(0, -borders) AslicerR = rawSlicer[:] AslicerR[d] = slice(borders, None) E[k,d,...] = numpy.abs(A_k_d[AslicerL] - A_k_d[AslicerR]) # step3: At each pixel, find the value of k that maximises the difference Ek(x,y) # in either direction and set the best size Sbest(x,y)=2**k k_max = E.max(1).argmax(0) dim = E.argmax(1) dim_vox_space = numpy.asarray([voxelspacing[dim[k_max.flat[i]].flat[i]] for i in xrange(k_max.size)]).reshape(k_max.shape) S = (2**k_max) * dim_vox_space # step4: Compute the coarseness feature Fcrs by averaging Sbest(x,y) over the entire image. return S.mean()
def pad_nd_img(image, new_shape=None, mode="edge", kwargs=None, return_slicer=False, shape_must_be_divisible_by=None): """ one padder to pad them all. Documentation? Well okay. A little bit """ if kwargs is None: kwargs = {} if new_shape is not None: old_shape = np.array(image.shape[-len(new_shape):]) else: assert shape_must_be_divisible_by is not None assert isinstance(shape_must_be_divisible_by, (list, tuple, np.ndarray)) new_shape = image.shape[-len(shape_must_be_divisible_by):] old_shape = new_shape num_axes_nopad = len(image.shape) - len(new_shape) new_shape = [ max(new_shape[i], old_shape[i]) for i in range(len(new_shape)) ] if not isinstance(new_shape, np.ndarray): new_shape = np.array(new_shape) if shape_must_be_divisible_by is not None: if not isinstance(shape_must_be_divisible_by, (list, tuple, np.ndarray)): shape_must_be_divisible_by = [shape_must_be_divisible_by ] * len(new_shape) else: assert len(shape_must_be_divisible_by) == len(new_shape) for i in range(len(new_shape)): if new_shape[i] % shape_must_be_divisible_by[i] == 0: new_shape[i] -= shape_must_be_divisible_by[i] new_shape = np.array([ new_shape[i] + shape_must_be_divisible_by[i] - new_shape[i] % shape_must_be_divisible_by[i] for i in range(len(new_shape)) ]) difference = new_shape - old_shape pad_below = difference // 2 pad_above = difference // 2 + difference % 2 pad_list = [[0, 0]] * num_axes_nopad + list( [list(i) for i in zip(pad_below, pad_above)]) res = np.pad(image, pad_list, mode, **kwargs) if not return_slicer: return res else: pad_list = np.array(pad_list) pad_list[:, 1] = np.array(res.shape) - pad_list[:, 1] slicer = list(slice(*i) for i in pad_list) return res, slicer
def _get_compiled_theano_functions(N_QUAD_PTS): # Planet masses: m1,m2 m1, m2 = T.dscalars(2) mstar = 1 mu1 = m1 * mstar / (mstar + m1) mu2 = m2 * mstar / (mstar + m2) eta1 = mstar + m1 eta2 = mstar + m2 beta1 = mu1 * T.sqrt(eta1 / mstar) / (mu1 + mu2) beta2 = mu2 * T.sqrt(eta2 / mstar) / (mu1 + mu2) j, k = T.lscalars('jk') s = (j - k) / k # Angle variable for averaging over psi = T.dvector('psi') # Quadrature weights quad_weights = T.dvector('w') # Dynamical variables: Ndof = 3 Nconst = 1 dyvars = T.vector() s1, s2, phi, I1, I2, Phi, dRtilde = [ dyvars[i] for i in range(2 * Ndof + Nconst) ] a20 = T.constant(1.) a10 = ((j - k) / j)**(2 / 3) * (eta1 / eta2)**(1 / 3) * a20 L10 = beta1 * T.sqrt(a10) L20 = beta2 * T.sqrt(a20) Psi = s * L20 + (1 + s) * L10 Rtilde = dRtilde - L10 - L20 #### # angles #### rtilde = T.constant(0.) Omega = -1 * rtilde l1 = phi + k * (1 + s) * psi + Omega l2 = phi + k * s * psi + Omega gamma1 = s1 - phi - Omega gamma2 = s2 - phi - Omega q1 = 0.5 * np.pi - Omega q2 = -0.5 * np.pi - Omega pomega1 = -1 * gamma1 pomega2 = -1 * gamma2 Omega1 = -1 * q1 Omega2 = -1 * q2 omega1 = pomega1 - Omega1 omega2 = pomega2 - Omega2 ### # actions ### Gamma1 = I1 Gamma2 = I2 L1 = Psi / k - s * (I1 + I2) - s * Phi L2 = -1 * Psi / k + (1 + s) * (I1 + I2) + (1 + s) * Phi Cz = -1 * Rtilde R = L1 + L2 - Gamma1 - Gamma2 - Cz G1 = L1 - Gamma1 G2 = L2 - Gamma2 r2_by_r1 = (L2 - L1 - Gamma2 + Gamma1) / (L1 + L2 - Gamma1 - Gamma2 - R) rho1 = 0.5 * R * (1 + r2_by_r1) rho2 = 0.5 * R * (1 - r2_by_r1) a1 = (L1 / beta1)**2 e1 = T.sqrt(1 - (1 - (Gamma1 / L1))**2) a2 = (L2 / beta2)**2 e2 = T.sqrt(1 - (1 - (Gamma2 / L2))**2) cos_inc1 = 1 - rho1 / G1 cos_inc2 = 1 - rho2 / G2 inc1 = T.arccos(cos_inc1) inc2 = T.arccos(cos_inc2) Hkep = -0.5 * T.sqrt(eta1) * beta1 / a1 - 0.5 * T.sqrt(eta2) * beta2 / a2 ko = KeplerOp() M1 = l1 - pomega1 M2 = l2 - pomega2 sinf1, cosf1 = ko(M1, e1 + T.zeros_like(M1)) sinf2, cosf2 = ko(M2, e2 + T.zeros_like(M2)) # n1 = T.sqrt(eta1 / mstar) * a1**(-3 / 2) n2 = T.sqrt(eta2 / mstar) * a2**(-3 / 2) Hint_dir, Hint_ind, r1, r2, v1, v2 = calc_Hint_components_sinf_cosf( a1, a2, e1, e2, inc1, inc2, omega1, omega2, Omega1, Omega2, n1, n2, sinf1, cosf1, sinf2, cosf2) eps = m1 * m2 / (mu1 + mu2) / T.sqrt(mstar) Hpert = (Hint_dir + Hint_ind / mstar) Hpert_av = Hpert.dot(quad_weights) Htot = Hkep + eps * Hpert_av ##################################################### # Set parameters for compiling functions with Theano ##################################################### # Get numerical quadrature nodes and weights nodes, weights = np.polynomial.legendre.leggauss(N_QUAD_PTS) # Rescale for integration interval from [-1,1] to [-pi,pi] nodes = nodes * np.pi weights = weights * 0.5 # 'givens' will fix some parameters of Theano functions compiled below givens = [(psi, nodes), (quad_weights, weights)] # 'ins' will set the inputs of Theano functions compiled below # Note: 'extra_ins' will be passed as values of object attributes # of the 'ResonanceEquations' class 'defined below extra_ins = [m1, m2, j, k] ins = [dyvars] + extra_ins orbels = [a1, e1, inc1, k * s1, a2, e2, inc2, k * s2, phi, Omega] orbels_dict = dict( zip([ 'a1', 'e1', 'inc1', 'theta1', 'a2', 'e2', 'inc2', 'theta2', 'phi' ], orbels)) actions = [L1, L2, Gamma1, Gamma2, rho1, rho2] actions_dict = dict( zip(['L1', 'L2', 'Gamma1', 'Gamma2', 'Q1', 'Q2'], actions)) # Conservative flow gradHtot = T.grad(Htot, wrt=dyvars) hessHtot = theano.gradient.hessian(Htot, wrt=dyvars) Jtens = T.as_tensor( np.pad(_get_Omega_matrix(Ndof), (0, Nconst), 'constant')) H_flow_vec = Jtens.dot(gradHtot) H_flow_jac = Jtens.dot(hessHtot) ########################## # Compile Theano functions ########################## orbels_fn = theano.function(inputs=ins, outputs=orbels_dict, givens=givens, on_unused_input='ignore') actions_fn = theano.function(inputs=ins, outputs=actions_dict, givens=givens, on_unused_input='ignore') Rtilde_fn = theano.function(inputs=ins, outputs=Rtilde, givens=givens, on_unused_input='ignore') Htot_fn = theano.function(inputs=ins, outputs=Htot, givens=givens, on_unused_input='ignore') Hpert_fn = theano.function(inputs=ins, outputs=Hpert_av, givens=givens, on_unused_input='ignore') Hpert_components_fn = theano.function( inputs=ins, outputs=[Hint_dir.dot(quad_weights), Hint_ind.dot(quad_weights)], givens=givens, on_unused_input='ignore') H_flow_vec_fn = theano.function(inputs=ins, outputs=H_flow_vec, givens=givens, on_unused_input='ignore') H_flow_jac_fn = theano.function(inputs=ins, outputs=H_flow_jac, givens=givens, on_unused_input='ignore') return dict({ 'orbital_elements': orbels_fn, 'actions': actions_fn, 'Rtilde': Rtilde_fn, 'Hamiltonian': Htot_fn, 'Hpert': Hpert_fn, 'Hpert_components': Hpert_components_fn, 'Hamiltonian_flow': H_flow_vec_fn, 'Hamiltonian_flow_jacobian': H_flow_jac_fn })
def compute_pyramid(net, psize, step, interval, image, mean_pixel=None): """Compute a pyramid of CNN-derived features for the given image. Similar to ``impyra.m``, except we haven't bothered upscaling, since Chen & Yuille don't upscale anyway. :param net: a ``caffe.Net`` instance corresponding to the fully convolutional "deploy" model. :param psize: parameter from Chen & Yuille. It's actually ``step * tsize``, where ``tsize`` is a kind of "natural" template size computed from the dimensions of skeletons in the training set. Unlike Chen & Yuille, we use **ROW MAJOR ORDER** for ``psize``! :param step: yet another parameter from Chen & Yuille. I actually have no idea what this corresponds to, intuitively. :param interval: how many pyramid levels does it take to halve the data size? :param image: ``h * w * c`` ``ndarray`` representing a single input image. :param mean_pixel: optional mean pixel argument. :returns: list of dictionaries with ``output_size``, ``{width,height}_pad``, ``scale`` and ``features`` keys. Each entry in the list corresponds to a level of the feature pyramid (largest scale first). The ``features`` key is an "image" representing the fully convolutional netowrk output, where the number of channels in the image is equal to the number of softmax outputs in the CNN.""" assert image.ndim == 3 and image.shape[2] == 3 if mean_pixel is None: mean_pixel = 128 * np.ones((3,)) else: # Flip the mean pixel to BGR mean_pixel = mean_pixel[::-1] height_pad, width_pad = np.maximum(np.ceil((psize - 1) / 2.0), 0)\ .astype('int') scale = 2 ** (1.0 / interval) image_size = np.array(image.shape[:2]) max_scale = int(1 + np.floor(np.log(np.min(image_size)) / np.log(scale))) # This will have keys 'output_size', 'scale', 'height_pad', 'width_pad', # 'features' rv = [{} for _ in xrange(max_scale)] # A natural size, I guess max_batch_size = interval for batch_level in xrange(0, max_scale, max_batch_size): batch_size = np.min(max_batch_size, max_scale - batch_level) base_dims = image_size / scale ** (batch_level) scaled = cf.io.resize(image, base_dims.astype('int')) # This next array will be passed to Caffe caffe_input = np.zeros(( batch_size, 3, scaled.shape[1] + 2 * height_pad, scaled.shape[0] + 2 * width_pad, )) for sublevel in xrange(batch_level, batch_level + batch_size): # Pad and add to Caffe input pad_dims = (2 * (height_pad,), 2 * (width_pad,), 2 * (0,)) padded = np.pad(scaled, pad_dims, mode='edge') - mean_pixel max_row, max_col = padded.shape[:2] caffe_input[sublevel - batch_level, :, :max_row, :max_col] = \ padded.transpose((2, 0, 1)) # Store metadata info = rv[sublevel] info['output_size'] = np.floor( (padded.shape[:2] - psize) / float(step) ).astype('int') + 1 info['scale'] = step * scale ** (sublevel - 1) info['width_pad'] = width_pad / float(step) info['height_pad'] = height_pad / float(step) # Resize for the next step base_dims /= scale scaled = cf.io.resize(image, base_dims.astype('int')) # To do a fully convolutional forward pass, we just reshape the data # layer and let the rest follow net.blobs['data'].reshape(*caffe_input.shape) net.blobs['data'].data[...] = caffe_input # TODO: What does result contain? Apparently it's a dictionary mapping # blob names to ndarrays for those blobs. In this case, I guess we'll # have a batch_size * softmax_outputs * something * something_else # ndarray, where something and something_else will be decided by # some annoying arithmetic on strides, pads and steps. Ugh, gross. result = net.forward()['prob'] for sublevel in xrange(batch_level, batch_level + batch_size): info = rv[sublevel] max_row, max_col = info['output_size'] info['features'] = result[ sublevel - batch_level, :, :max_row, :max_col ].transpose((1, 2, 0)) return rv
def MakeNdarray(tensor): """Create a numpy ndarray from a tensor. Create a numpy ndarray with the same shape and data as the tensor. For example: ```python # Tensor a has shape (2,3) a = tf.constant([[1,2,3],[4,5,6]]) proto_tensor = tf.make_tensor_proto(a) # convert `tensor a` to a proto tensor tf.make_ndarray(proto_tensor) # output: array([[1, 2, 3], # [4, 5, 6]], dtype=int32) # output has shape (2,3) ``` Args: tensor: A TensorProto. Returns: A numpy array with the tensor contents. Raises: TypeError: if tensor has unsupported type. """ shape = [d.size for d in tensor.tensor_shape.dim] num_elements = np.prod(shape, dtype=np.int64) tensor_dtype = dtypes.as_dtype(tensor.dtype) dtype = tensor_dtype.as_numpy_dtype if tensor.tensor_content: return (np.frombuffer(tensor.tensor_content, dtype=dtype).copy().reshape(shape)) if tensor_dtype == dtypes.string: # np.pad throws on these arrays of type np.object. values = list(tensor.string_val) padding = num_elements - len(values) if padding > 0: last = values[-1] if values else "" values.extend([last] * padding) return np.array(values, dtype=dtype).reshape(shape) if tensor_dtype == dtypes.float16 or tensor_dtype == dtypes.bfloat16: # the half_val field of the TensorProto stores the binary representation # of the fp16: we need to reinterpret this as a proper float16 values = np.fromiter(tensor.half_val, dtype=np.uint16) values.dtype = tensor_dtype.as_numpy_dtype elif tensor_dtype == dtypes.float32: values = np.fromiter(tensor.float_val, dtype=dtype) elif tensor_dtype == dtypes.float64: values = np.fromiter(tensor.double_val, dtype=dtype) elif tensor_dtype in [ dtypes.int32, dtypes.uint8, dtypes.uint16, dtypes.int16, dtypes.int8, dtypes.qint32, dtypes.quint8, dtypes.qint8, dtypes.qint16, dtypes.quint16 ]: values = np.fromiter(tensor.int_val, dtype=dtype) elif tensor_dtype == dtypes.int64: values = np.fromiter(tensor.int64_val, dtype=dtype) elif tensor_dtype == dtypes.complex64: it = iter(tensor.scomplex_val) values = np.array([complex(x[0], x[1]) for x in zip(it, it)], dtype=dtype) elif tensor_dtype == dtypes.complex128: it = iter(tensor.dcomplex_val) values = np.array([complex(x[0], x[1]) for x in zip(it, it)], dtype=dtype) elif tensor_dtype == dtypes.bool: values = np.fromiter(tensor.bool_val, dtype=dtype) else: raise TypeError("Unsupported tensor type: %s" % tensor.dtype) if values.size == 0: return np.zeros(shape, dtype) if values.size != num_elements: values = np.pad(values, (0, num_elements - values.size), "edge") return values.reshape(shape)
# Read image image = cv2.imread('test_image.jpg') # Resize image, if need #(row_num_im, col_num_im, chan) = image.shape #image = cv2.resize(image,(int(col_num_im/2), int(row_num_im/2))) A = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY).astype('float64') # Matrix for output (row_num, col_num) = A.shape B = np.zeros((row_num, col_num)) # padding for convinience A_padding = np.pad(A, pad_width=1, mode='constant', constant_values=0) # Sobel operator filt_size = 3 Sx = np.array([[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]]) Sy = np.array([[1, 2, 1], [0, 0, 0], [-1, -2, -1]]) for i in range(row_num): for j in range(col_num): # because we do padding, index changed idx_i_A = i + 1 idx_j_A = j + 1 A_block = A_padding[idx_i_A - 1:idx_i_A + 2, idx_j_A - 1:idx_j_A + 2] B[i, j] = np.sum(np.multiply(A_block, Sx)) + np.sum( np.multiply(A_block, Sy)) B_min = np.amin(B)
def pad_x_single(x_single, max_length): assert x_single.shape[0] <= max_length pad_width_head = (0, max_length - x_single.shape[0]) pad_width_tail = ((0, 0),) * (x_single.ndim - 1) return np.pad(x_single, (pad_width_head,) + pad_width_tail, mode='constant', constant_values=pad_value)