def __init__(self, part, params, combiner = None): self.combiner = combiner self.params = params self.comb_buffer = {} self.fname = MAP_OUTPUT % (this_partition(), part) ensure_path(self.fname, False) self.fd = file(self.fname + ".partial", "w") self.part = part
def download_and_sort(self): dlname = REDUCE_DL % this_partition() ensure_path(dlname, False) msg("Reduce will be downloaded to %s" % dlname) out_fd = file(dlname + ".partial", "w") for fname in self.inputs: sze, fd = connect_input(fname) for k, v in fun_reduce_reader(fd, sze, fname): if " " in k: err("Spaces are not allowed in keys "\ "with external sort.") if "\0" in v: err("Zero bytes are not allowed in "\ "values with external sort. "\ "Consider using base64 encoding.") out_fd.write("%s %s\0" % (k, v)) out_fd.close() os.rename(dlname + ".partial", dlname) msg("Reduce input downloaded ok") msg("Starting external sort") sortname = REDUCE_SORTED % this_partition() ensure_path(sortname, False) cmd = ["sort", "-n", "-k", "1,1", "-z",\ "-t", " ", "-o", sortname, dlname] proc = subprocess.Popen(cmd) ret = proc.wait() if ret: err("Sorting %s to %s failed (%d)" %\ (dlname, sortname, ret)) msg("External sort done: %s" % sortname) return self.multi_file_iterator([sortname], reader =\ lambda fd, sze, fname:\ re_reader("(?s)(.*?) (.*?)\000", fd, sze, fname))
def __init__(self, params): self.fname = REDUCE_OUTPUT % this_partition() self.params = params ensure_path(self.fname, False) self.fd = file(self.fname + ".partial", "w")