Пример #1
0
def result_iterator(results, notifier = None, proxy = None):
        
        if not proxy:
                proxy = os.environ.get("DISCO_PROXY", None)
        if proxy:
                if proxy.startswith("disco://"):
                        proxy = "%s:%s" % (proxy[8:], util.MASTER_PORT)
                elif proxy.startswith("http://"):
                        proxy = proxy[7:]
        res = []
        for dir_url in results:
                if dir_url.startswith("dir://"):
                        res += util.parse_dir(dir_url, proxy)
                else:
                        res.append(dir_url)
        
        for url in res:
                if url.startswith("file://"):
                        fname = url[7:]
                        fd = file(fname)
                        sze = os.stat(fname).st_size
                        http = None
                else:
                        host, fname = url[8:].split("/", 1)
                        if proxy:
                                ext_host = proxy
                                fname = "/disco/node/%s/%s" % (host, fname)
                        else:
                                ext_host = host + ":" + util.HTTP_PORT
                        ext_file = "/" + fname

                        http = httplib.HTTPConnection(ext_host)
                        http.request("GET", ext_file, "")
                        fd = http.getresponse()
                        if fd.status != 200:
                                raise "HTTP error %d" % fd.status
                
                        sze = int(fd.getheader("content-length"))

                if notifier:
                        notifier(url)

                for x in func.netstr_reader(fd, sze, fname):
                        yield x
                
                if http:
                        http.close()
                else:
                        fd.close()
Пример #2
0
        def download_and_sort(self):
                dlname = REDUCE_DL % (job_name, this_partition())
                ensure_path(dlname, False)
                msg("Reduce will be downloaded to %s" % dlname)
                out_fd = file(dlname + ".partial", "w")
                for fname in self.inputs:
                        sze, fd = connect_input(fname)
                        for k, v in netstr_reader(fd, sze, fname):
                                if " " in k:
                                        err("Spaces are not allowed in keys "\
                                            "with external sort.")
                                if "\0" in v:
                                        err("Zero bytes are not allowed in "\
                                            "values with external sort. "\
                                            "Consider using base64 encoding.")
                                out_fd.write("%s %s\0" % (k, v))
                out_fd.close()
                os.rename(dlname + ".partial", dlname)
                msg("Reduce input downloaded ok")

                msg("Starting external sort")
                sortname = REDUCE_SORTED % (job_name, this_partition())
                ensure_path(sortname, False)
                cmd = ["sort", "-n", "-s", "-k", "1,1", "-z",\
                        "-t", " ", "-o", sortname, dlname]

                proc = subprocess.Popen(cmd)
                ret = proc.wait()
                if ret:
                        err("Sorting %s to %s failed (%d)" %\
                                (dlname, sortname, ret))
                
                msg("External sort done: %s" % sortname)
                return self.multi_file_iterator([sortname], reader =\
                        lambda fd, sze, fname:\
                                re_reader("(.*?) (.*?)\000", fd, sze, fname))
Пример #3
0
def json_reader(fd, size, filename):
    from disco.func import netstr_reader
    from discodex import json
    for k, v in netstr_reader(fd, size, filename):
        yield json.loads(k), json.loads(v)
Пример #4
0
def netstrparse(fd, size, fname, params):
    """Reads (key, value) pairs directly from `netstr` input."""
    from disco import func
    return func.netstr_reader(fd, size, fname, params)
Пример #5
0
def reader(fd, size, fname, extra):
    from disco.func import netstr_reader

    for k, v in netstr_reader(fd, size, fname):
        yield k + extra, v
Пример #6
0
def reader(fd, size, fname, extra):
    from disco.func import netstr_reader
    for k, v in netstr_reader(fd, size, fname):
        yield k + extra, v