Пример #1
0
    def _run(self):
        entries = self.track_status(self, "%s entries reduced")
        red_out, out_url, fd_list = self.connect_output()
        params = self.params

        if self.ext_reduce:
            external.prepare(self.reduce, self.ext_params, self.path('ext.reduce'))
            self.reduce = FunctionType(external.ext_reduce.func_code,
                                       globals=external.__dict__)
            self.insert_globals([self.reduce])

        total_size = sum(size for fd, size, url in self.connected_inputs)
        Status("Input is %s" % (util.format_size(total_size)))

        self.init(entries, params)
        if util.argcount(self.reduce) < 3:
            for k, v in self.reduce(entries, *(params, )):
                red_out.add(k, v)
        else:
            self.reduce(entries, red_out, params)

        self.close_output(fd_list)
        external.close_ext()

        if self.save:
            OutputURL(util.ddfs_save(self.blobs, self.jobname, self.master))
            Status("Results pushed to DDFS")
        else:
            index, index_url = self.reduce_index
            f = file(index, 'w')
            print >> f, '%d %s' % (self.id, out_url)
            sync(f)
            f.close()
            OutputURL(index_url)
Пример #2
0
 def reduce(self, task, params):
     ordered = self.reduce_input(task, params)
     entries = self.status_iter(ordered, "%s entries reduced")
     output = self.output(task, None, open=self.opener('reduce', 'out', params)).file.fds[-1]
     self['reduce_init'](entries, params)
     if util.argcount(self['reduce']) < 3:
         for record in self['reduce'](entries, *(params, )):
             output.add(*record)
     else:
         self['reduce'](entries, output, params)
Пример #3
0
 def reduce(self, task, params):
     ordered = self.reduce_input(task, params)
     entries = self.status_iter(ordered, "%s entries reduced")
     output = self.output(task, None, open=self.opener('reduce', 'out', params)).file.fds[-1]
     self['reduce_init'](entries, params)
     if util.argcount(self['reduce']) < 3:
         for record in self['reduce'](entries, *(params, )):
             output.add(*record)
     else:
         self['reduce'](entries, output, params)
Пример #4
0
 def __init__(self, url, streams, params, fd=None, size=None):
     self.fds = []
     for stream in streams:
         maybe_params = (params,) if util.argcount(stream) == 4 else ()
         fd = stream(fd, size, url, *maybe_params)
         if isinstance(fd, tuple):
             if len(fd) == 3:
                 fd, size, url = fd
             else:
                 fd, url = fd
         self.fds.append(fd)
Пример #5
0
 def __init__(self, url, streams, params, fd=None, size=None):
     self.fds = []
     for stream in streams:
         maybe_params = (params, ) if util.argcount(stream) == 4 else ()
         fd = stream(fd, size, url, *maybe_params)
         if isinstance(fd, tuple):
             if len(fd) == 3:
                 fd, size, url = fd
             else:
                 fd, url = fd
         self.fds.append(fd)
Пример #6
0
    def connect_input(self, url, fd=None, size=None):
        def fd_tuple(object, *args):
            if isinstance(object, tuple):
                return object
            return (object,) + args

        for input_stream in self.input_stream:
            fd, size, url = fd_tuple(input_stream(fd, size, url, self.params),
                                     size, url)

        # backwards compatibility for readers
        if self.reader:
            if util.argcount(self.reader) == 3:
                return fd_tuple(self.reader(fd, size, url), size, url)
            return fd_tuple(self.reader(fd, size, url, self.params), size, url)
        return fd, size, url
Пример #7
0
    def connect_input(self, url, fd=None, size=None):
        def fd_tuple(object, *args):
            if isinstance(object, tuple):
                return object
            return (object,) + args

        for input_stream in self.input_stream:
            fd, size, url = fd_tuple(input_stream(fd, size, url, self.params),
                                     size, url)

        # backwards compatibility for readers
        if self.reader:
            if util.argcount(self.reader) == 3:
                return fd_tuple(self.reader(fd, size, url), size, url)
            return fd_tuple(self.reader(fd, size, url, self.params), size, url)
        return fd, size, url