Пример #1
0
 def __init__(self, part, params, combiner = None):
         self.combiner = combiner
         self.params = params
         self.comb_buffer = {}
         self.fname = MAP_OUTPUT % (this_partition(), part)
         ensure_path(self.fname, False)
         self.fd = file(self.fname + ".partial", "w")
         self.part = part
Пример #2
0
        def download_and_sort(self):
                dlname = REDUCE_DL % this_partition()
                ensure_path(dlname, False)
                msg("Reduce will be downloaded to %s" % dlname)
                out_fd = file(dlname + ".partial", "w")
                for fname in self.inputs:
                        sze, fd = connect_input(fname)
                        for k, v in fun_reduce_reader(fd, sze, fname):
                                if " " in k:
                                        err("Spaces are not allowed in keys "\
                                            "with external sort.")
                                if "\0" in v:
                                        err("Zero bytes are not allowed in "\
                                            "values with external sort. "\
                                            "Consider using base64 encoding.")
                                out_fd.write("%s %s\0" % (k, v))
                out_fd.close()
                os.rename(dlname + ".partial", dlname)
                msg("Reduce input downloaded ok")

                msg("Starting external sort")
                sortname = REDUCE_SORTED % this_partition()
                ensure_path(sortname, False)
                cmd = ["sort", "-n", "-k", "1,1", "-z",\
                        "-t", " ", "-o", sortname, dlname]

                proc = subprocess.Popen(cmd)
                ret = proc.wait()
                if ret:
                        err("Sorting %s to %s failed (%d)" %\
                                (dlname, sortname, ret))
                
                msg("External sort done: %s" % sortname)
                return self.multi_file_iterator([sortname], reader =\
                        lambda fd, sze, fname:\
                                re_reader("(?s)(.*?) (.*?)\000", fd, sze, fname))
Пример #3
0
 def __init__(self, params):
         self.fname = REDUCE_OUTPUT % this_partition()
         self.params = params
         ensure_path(self.fname, False)
         self.fd = file(self.fname + ".partial", "w")