def decode(self, src_sentence): """Decodes a single source sentence using beam search. """ self.count = 0 self.time = 0 self.initialize_predictor(src_sentence) hypos = self._get_initial_hypos() it = 1 if self.reward: self.l = len(src_sentence) while not self.stop_criterion( utils.flattened(hypos)) and it < self.max_len: it = it + 1 next_hypos = [] for i, group in enumerate(hypos): next_group = [] for hypo in group: if hypo.get_last_word() == utils.EOS_ID: next_group.append(hypo) continue for next_hypo in self._expand_hypo(hypo): next_group.append(next_hypo) next_hypos.append( self._get_next_hypos(next_group, self.group_sizes[i], next_hypos)) hypos = next_hypos return self.get_full_hypos_sorted(utils.flattened(hypos))
def find_files_to_process(): files_from_crawler = list(flattened(recursive_listdir(DOWNLOAD_DIR))) files_to_process = [] files_to_ignore = [] for path in files_from_crawler: try: import_date = find_date(path) size = os.path.getsize(path) files_to_process.append((path, import_date, os.path.getsize(path))) except ValueError: files_to_ignore.append(path) def _import_date((_1, import_date, _2)): return import_date def _size((_1, _2, size)): return size bytes_accumulator = Accumulator() files_to_process.sort(key=_import_date) files_to_process = [(f, bytes_accumulator(_size(f))) for f in files_to_process] bytes_to_process = bytes_accumulator.getvalue() return (bytes_to_process, files_to_process, files_to_ignore)
def _get_next_hypos(self, all_hypos, size, other_groups=None): """Get hypos for the next iteration. """ all_scores = np.array([self.get_adjusted_score(hypo) for hypo in all_hypos]) if other_groups: all_scores = all_scores + self.lmbda*self.hamming_distance_penalty(all_hypos, utils.flattened(other_groups)) inds = utils.argmax_n(all_scores, size) return [all_hypos[ind] for ind in inds]
except (ValueError, ImportError), err: return False re_agency = re.compile('^[0-9]*[A-Z]+') def extract_prefix(filename): prefix_match = re_agency.match(filename.upper()) if not prefix_match is None: prefix = prefix_match.group() return fix_prefix(prefix) else: return None files_to_process = filter( filename_has_date, map(os.path.basename, flattened(recursive_listdir(DOWNLOAD_DIR)))) prefixes = map(extract_prefix, files_to_process) def unique(iterable): def combine(accum, item): accum[item] = None return accum return reduce(combine, iterable, {}).keys() def frequency(iterable): def combine(frequencies, item): cnt = frequencies.get(item, 0) frequencies[item] = cnt + 1 return frequencies
try: import_date = find_date(filename) return True except (ValueError, ImportError), err: return False re_agency = re.compile('^[0-9]*[A-Z]+') def extract_prefix(filename): prefix_match = re_agency.match(filename.upper()) if not prefix_match is None: prefix = prefix_match.group() return fix_prefix(prefix) else: return None files_to_process = filter(filename_has_date, map(os.path.basename, flattened(recursive_listdir(DOWNLOAD_DIR)))) prefixes = map(extract_prefix, files_to_process) def unique(iterable): def combine(accum, item): accum[item] = None return accum return reduce(combine, iterable, {}).keys() def frequency(iterable): def combine(frequencies, item): cnt = frequencies.get(item, 0) frequencies[item] = cnt + 1 return frequencies return reduce(combine, iterable, {})