def _cut_all_cols(self, row_ranges): """ cut along columns of all image strips :return: array of _cut_cols results """ if sys.platform == 'darwin': # parallel running on HPC has error. eprint('parallel running on macOS ...') return PP().map(lambda (u, l): self._cut_cols(u, l), row_ranges) else: eprint('no parallel running ...') return map(lambda (u, l): self._cut_cols(u, l), row_ranges)
def _cut_cols(self, upper, lower): """ cut along columns of a image between upper and lower :return: cutting positions, and curves used """ col_pixels = self.page.cnt_col_pixels(upper, lower) marker = '{}-col-{}-{}'.format(self.page.name, upper, lower) eprint('Run carmel on {}'.format(marker)) cutting_idx = map( lambda idx: idx - 1, Cutter._cutting_points(col_pixels, self.carmel_args.n_col, self.carmel_args.c_std1, self.carmel_args.c_std2, self.carmel_args.c_penalty, marker)) return cutting_idx
def _cut_rows(self): """ cut along rows of a image :return: cutting positions """ row_pixels = self.page.cnt_row_pixels() marker = '{}-row'.format(self.page.name) eprint('Run carmel on {}'.format(marker)) cutting_idx = map( lambda idx: idx - 1, Cutter._cutting_points(row_pixels, self.carmel_args.n_row, self.carmel_args.r_std1, self.carmel_args.r_std2, self.carmel_args.r_penalty, marker)) return cutting_idx
def _cut_all_cols_with_samples(self): """ cut along columns of all image strips :return: array of _cut_cols results """ if sys.platform == 'darwin': # parallel running on HPC has error. eprint('parallel running on macOS ...') return map( lambda (i, (u, l)): self._cut_cols_with_samples(u, l, i), self.row_ranges_w_index) else: eprint('no parallel running ...') return map( lambda (i, (u, l)): self._cut_cols_with_samples(u, l, i), self.row_ranges_w_index)
def _cut_cols_with_samples(self, upper, lower, idx): """ cut along columns of a image between upper and lower :return: cutting positions, and curves used """ col_pixels = list(self.samples_of_boundaries[idx]) marker = '{}-col-{}-{}'.format(self.page.name, upper, lower) eprint('Run carmel on {}'.format(marker)) cutting_idx = map( lambda idx: idx - 1, CutterSampling._cutting_points(col_pixels, self.carmel_args.n_col, self.carmel_args.c_std1, self.carmel_args.c_std2, self.carmel_args.c_penalty, marker)) return cutting_idx
def pixels_to_vectors(pixel_row, snn, wb, step=10, window=10): h, w = pixel_row.shape if w % step != 0: padding = np.full((h, step - (w % step)), 255, dtype='uint8') pixel_row = np.concatenate([pixel_row, padding], axis=1) h, w = pixel_row.shape eprint('h, w of pixel row is {}, {}'.format(h, w)) num_frames = w / step frames = pixel_row.reshape((h, -1, step)) all_features = [] for i in range(num_frames): features = compute_snn_features( frames[:, max(0, i - window + 1):i + 1, :], snn, wb) all_features.append(features) return all_features
def _cutting_points(cls, black_pixels, mean, std1, std2, p, marker='no_marker'): """ Use Carmel to compute cutting positions. align3.sh, make-fsa2.sh, and make-fst2.sh are required. :param black_pixels: number of pixels being cut :param mean: the mean of total characters in a row, or total rows in a page :param std1: stddev over number of characters or number of rows :param std2: stddev over character size or row size :param p: penalty of cutting black pixels :param marker: marker to name Carmel running fsts. :return: cutting positions """ pwd = path.dirname(path.realpath(__file__)) exe = '{}/align3.sh'.format(pwd) f = NamedTemporaryFile('w', prefix='carmel_input_{}_'.format(marker), delete=False) f.write('\n') f.write(' '.join(map(lambda c: 'c' + str(c), black_pixels))) f.close() cmd = "{} {} {} {} {} {}".format(exe, f.name, mean, std1, std2, p) pipe = Popen(cmd, shell=True, stdout=PIPE, stderr=PIPE) out, err = pipe.communicate() ret = pipe.wait() eprint('marker: {}, returns: {}'.format(marker, out)) eprint('marker: {}, return code is {}'.format(marker, ret)) assert out != "", "marker: {}, no output received".format(marker) return map(lambda e: int(e), out.split())
row_ranges = Cutter.get_ranges(row_cut) pixel_rows = [] binary_img = Page.get_binary_image(img_2d) for upper, lower in row_ranges: pixel_rows.append(binary_img[upper:lower, :]) pixel_rows = np.asarray(pixel_rows) features_ = np.load(feature_path) all_features = features_['vectors'] init_feature_bins = features_['feature_bins'] k = 23 # 22 unique characters and one space m = 4096 max_n_col = int(w / step + 1) eprint('max_n_col={}'.format(max_n_col)) mean_size = 6 std_size = 3 p_gaussian = map( lambda x: backward_gaussian_smooth(mean_size, std_size, x + 1), range(step)) p_penalty = 0.9 (feature_bins, feature_zs, p_current, boundaries, col_sizes, gmm, zs_probs, samples_of_boundaries) = boundaries_sampler( pixel_rows, k, m, init_boundaries=col_cuts, init_feature_bins=init_feature_bins,
def boundaries_sampler(pixel_rows, k, m, init_boundaries, init_feature_bins, max_n_col, step, window, snn_features, p_gaussian, p_penalty, t0=3, t1=1, n_epoch=5000, burn_in=1000, collection_rate=0.05, checkpoint_dir=None, checkpoint_epoch_step=10): """ block-wise sample boundaries for rows of cipher images. cipher images are first cut into rows, then each row is cut into frames. frames between two boundaries are treated as one image character. :param pixel_rows: array of pixel rows [np.array((h, w))] :param k: number of total clusters :param m: dimension of features :param init_boundaries: initial boundaries to cut pixel rows :param init_feature_bins: initial features generated from initial boundaries :param max_n_col: maximum number of characters in each row :param step: number of pixel for each frame :param window: maximum steps looking back for finding boundaries at each frame :param snn_features: pre-computed features from snn encoders, see vectorize.py :param mu0: base mean of Gaussian distributions :param sigma0: base variance of Gaussian distributions :param sigma1: variance as regularizer :param t0: high temperature for simulated annealing :param t1: low temperature :param n_epoch: number of epochs for sampling :return: boundaries of pixel rows """ n_row = len(pixel_rows) eprint('n_row={}'.format(n_row)) feature_bins = np.zeros((n_row, max_n_col, m)) boundaries = np.zeros((n_row, max_n_col), dtype='int64') col_sizes = np.zeros(n_row, dtype='int64') feature_zs = np.full((n_row, max_n_col), -1, dtype='int64') # GMM init cluster = GMM(features=np.concatenate(init_feature_bins, axis=0), n_clusters=k, params_init="kmeans_init", cov_type='spherical', scaling_factor=1.0) cluster.fit() init_feature_zs = cluster.labels() eprint('init_feature_zs: {}'.format(init_feature_zs)) i = 0 for row in range(n_row): col_sizes[row] = len(init_feature_bins[row]) feature_bins[row, :col_sizes[row], :] = init_feature_bins[row] boundaries[row, :col_sizes[row]] = init_boundaries[row] feature_zs[row, :col_sizes[row]] = init_feature_zs[i:i + col_sizes[row]] i += col_sizes[row] # init gmm gmm, zs_probs = estimate_gmm(feature_bins, feature_zs, col_sizes, k, ignore=-1, cov_type='spherical', scaling_factor=1.0) # compute p(current) probs_of_rows = np.full(n_row, -np.inf) p_current = 0 for row in range(n_row): gaussian_probs = gmm_assign(gmm, feature_bins[row, :col_sizes[row]]) p_row = gaussian_probs + zs_probs[:, np.newaxis] probs_of_rows[row] = np.sum(p_row[feature_zs[row, :col_sizes[row]], range(col_sizes[row])]) p_current += probs_of_rows[row] samples_of_boundaries = np.zeros((n_row, max_n_col)) # sampling body t_step = 1.0 * (t0 - t1) / burn_in for epoch in range(n_epoch): ti = max(t0 - epoch * t_step, t1) if epoch < burn_in: eprint('burn-in epoch {}'.format(epoch)) else: eprint('sampling epoch {}'.format(epoch)) total_elements = np.sum(col_sizes) eprint('epoch-log_p-temperature: {},{},{}'.format( epoch, p_current / total_elements, ti)) eprint(col_sizes) for row in range(n_row): # remove row_j from GMM feature_bins[row, :, :] = 0 p_current -= probs_of_rows[row] # evaluate GMM gmm, zs_probs = estimate_gmm(feature_bins, feature_zs, col_sizes, k, ignore=row, cov_type='spherical', scaling_factor=1.0) # re-sample character boundaries alpha_tbl, all_probs, n_frames = iter_forward_filtering( pixel_rows[row], gmm, step, window, snn_features[row], zs_probs) new_boundaries, new_features = iter_backward_sampling( pixel_rows[row], step, window, alpha_tbl, all_probs, snn_features[row], ti, p_gaussian, p_penalty) # update row size col_sizes[row] = len(new_boundaries) feature_bins[row, :col_sizes[row], :] = new_features boundaries[row, :col_sizes[row]] = new_boundaries # re-sample cluster assignments new_zs = iter_cluster_sampling(new_features, gmm, zs_probs, ti) feature_zs[row, :col_sizes[row]] = new_zs # re-count p_row gaussian_probs = gmm_assign(gmm, new_features) p_row = gaussian_probs + zs_probs[:, np.newaxis] probs_of_rows[row] = np.sum(p_row[feature_zs[row, :col_sizes[row]], range(col_sizes[row])]) p_current += probs_of_rows[row] for i in range(col_sizes[row]): if epoch >= burn_in and np.random.random() <= collection_rate: samples_of_boundaries[row, new_boundaries[i]] += 1 if (checkpoint_dir is not None and epoch >= burn_in and epoch % checkpoint_epoch_step == 0): eprint('save checkpoint of epoch-{} to {}'.format( epoch, checkpoint_dir)) np.savez('{}/checkpoint.npz'.format(checkpoint_dir), epoch=epoch, feature_bins=feature_bins, feature_zs=feature_zs, p_current=p_current, boundaries=boundaries, col_sizes=col_sizes, gmm=gmm, zs_probs=zs_probs, samples_of_boundaries=samples_of_boundaries, probs_of_rows=probs_of_rows) return (feature_bins, feature_zs, p_current, boundaries, col_sizes, gmm, zs_probs, samples_of_boundaries)