Пример #1
0
def _function_wrapper(args_package):
    """Allow multiprocessing Pool to call generic functions/args/kwargs.

    Data are saved here rather than handed back to avoid the scenario where
    all of the output from a batch run is held in memory.
    """
    (identifier, directory, funcname, args, kwargs) = args_package

    readable = utils.readable_call(funcname, args, kwargs)
    filename = "%s/%s.shelve" % (directory, identifier)
    filename = re.sub('/+', '/', filename)
    print "%s -> %s" % (readable, filename)

    result = utils.func_exec(funcname, args, kwargs, printcall=False)

    outfile = shelve.open(filename, 'n', protocol=-1)
    outfile["identifier"] = identifier
    outfile["filename"] = filename
    outfile["funcname"] = funcname
    outfile["args"] = args
    outfile["kwargs"] = kwargs
    outfile["result"] = result
    outfile.close()

    return identifier
Пример #2
0
    def scatter(self, *args, **kwargs):
        if "execute_key" not in kwargs:
            print "need to identify an execution key for the task"
            return

        rehashed = [_make_serializable(item) for item in args]

        argpkl = pickle.dumps(
            (self.funcname, rehashed, tuple(sorted(kwargs.items()))), -1)

        identifier = hashlib.sha224(argpkl).hexdigest()
        readable = utils.readable_call(self.funcname, rehashed, kwargs)

        # delete the kwarg for this function to associate an ID to the output
        # so it does not interfere with the function call.
        execute_key = kwargs["execute_key"]
        del kwargs["execute_key"]

        jobfile_name = "%s/%s.job" % (pd.job_directory, identifier)
        donefile_name = "%s/%s.done" % (pd.job_directory, identifier)

        # first remove any lurking completed jobs
        try:
            os.remove(donefile_name)
            os.remove(jobfile_name)
        except OSError:
            if self.verbose:
                print "all clear: ", donefile_name

        job_shelve = shelve.open(jobfile_name, "n", protocol=-1)
        job_shelve['funcname'] = self.funcname
        job_shelve['args'] = args
        job_shelve['kwargs'] = kwargs
        job_shelve['tag'] = execute_key
        job_shelve['identifier'] = identifier
        job_shelve['call'] = readable

        #print job_shelve
        job_shelve.close()

        self.call_stack.append(identifier)
Пример #3
0
    def scatter(self, *args, **kwargs):
        if "execute_key" not in kwargs:
            print "need to identify an execution key for the task"
            return

        rehashed = [_make_serializable(item) for item in args]

        argpkl = pickle.dumps((self.funcname, rehashed, tuple(sorted(kwargs.items()))), -1)

        identifier = hashlib.sha224(argpkl).hexdigest()
        readable = utils.readable_call(self.funcname, rehashed, kwargs)

        # delete the kwarg for this function to associate an ID to the output
        # so it does not interfere with the function call.
        execute_key = kwargs["execute_key"]
        del kwargs["execute_key"]

        jobfile_name = "%s/%s.job" % (pd.job_directory, identifier)
        donefile_name = "%s/%s.done" % (pd.job_directory, identifier)

        # first remove any lurking completed jobs
        try:
            os.remove(donefile_name)
            os.remove(jobfile_name)
        except OSError:
            if self.verbose:
                print "all clear: ", donefile_name

        job_shelve = shelve.open(jobfile_name, "n", protocol=-1)
        job_shelve["funcname"] = self.funcname
        job_shelve["args"] = args
        job_shelve["kwargs"] = kwargs
        job_shelve["tag"] = execute_key
        job_shelve["identifier"] = identifier
        job_shelve["call"] = readable

        # print job_shelve
        job_shelve.close()

        self.call_stack.append(identifier)
    def memoize(*args, **kwargs):
        funcname = func.__name__
        rehashed = [_make_serializable(item) for item in args]

        argpkl = pickle.dumps(
            (funcname, rehashed, tuple(sorted(kwargs.items()))), -1)

        identifier = hashlib.sha224(argpkl).hexdigest()

        readable = utils.readable_call(funcname, rehashed, kwargs)
        filename = "%s/%s.shelve" % (memoize_directory, identifier)
        filename = re.sub('/+', '/', filename)
        print "%s -> %s" % (readable, filename)

        done_filename = filename + ".done"
        busy_filename = filename + ".busy"

        # prevent the race condition where many threads want to
        # start writing a new cache file at the same time
        time.sleep(random.uniform(0, 0.5))

        # if the result is cahced or being calculated elsewhere,
        if os.access(done_filename, os.F_OK) or \
           os.access(busy_filename, os.F_OK):
            printed = False
            # wait for the other calculation process to finish
            while (not os.access(done_filename, os.F_OK)):
                time.sleep(1.)
                if not printed:
                    print "waiting for %s" % filename
                    printed = True

            # if many threads were waiting to read, space their reads
            time.sleep(random.uniform(0, 0.5))

            print "ready to read %s" % filename
            try:
                input_shelve = shelve.open(filename, "r", protocol=-1)
                retval = input_shelve['result']
                input_shelve.close()
                print "used cached value %s" % filename
            except:
                raise ValueError
        else:
            # first flag the cachefile as busy so other threads wait
            busyfile = open(busy_filename, "w")
            busyfile.write("working")
            busyfile.close()

            # recalculate the function
            print "no cache, recalculating %s" % filename
            start = time.time()
            retval = func(*args, **kwargs)

            outfile = shelve.open(filename, "n", protocol=-1)
            outfile["signature"] = identifier
            outfile["filename"] = filename
            outfile["funcname"] = funcname
            outfile["args"] = rehashed
            outfile["kwargs"] = kwargs
            outfile["result"] = retval
            outfile.close()
            time.sleep(0.2)

            # indicate that the function is done being recalculated
            donefile = open(done_filename, "w")
            donefile.write("%10.15f\n" % (time.time() - start))
            donefile.close()

            # remove the busy flag
            os.remove(busy_filename)

        return retval
    def memoize(*args, **kwargs):
        funcname = func.__name__
        rehashed = [_make_serializable(item) for item in args]

        argpkl = pickle.dumps((funcname, rehashed,
                               tuple(sorted(kwargs.items()))), -1)

        identifier = hashlib.sha224(argpkl).hexdigest()

        readable = utils.readable_call(funcname, rehashed, kwargs)
        filename = "%s/%s.shelve" % (memoize_directory, identifier)
        filename = re.sub('/+', '/', filename)
        print "%s -> %s" % (readable, filename)

        done_filename = filename + ".done"
        busy_filename = filename + ".busy"

        # prevent the race condition where many threads want to
        # start writing a new cache file at the same time
        time.sleep(random.uniform(0, 0.5))

        # if the result is cahced or being calculated elsewhere,
        if os.access(done_filename, os.F_OK) or \
           os.access(busy_filename, os.F_OK):
            printed = False
            # wait for the other calculation process to finish
            while(not os.access(done_filename, os.F_OK)):
                time.sleep(1.)
                if not printed:
                    print "waiting for %s" % filename
                    printed = True

            # if many threads were waiting to read, space their reads
            time.sleep(random.uniform(0, 0.5))

            print "ready to read %s" % filename
            try:
                input_shelve = shelve.open(filename, "r", protocol=-1)
                retval = input_shelve['result']
                input_shelve.close()
                print "used cached value %s" % filename
            except:
                raise ValueError
        else:
            # first flag the cachefile as busy so other threads wait
            busyfile = open(busy_filename, "w")
            busyfile.write("working")
            busyfile.close()

            # recalculate the function
            print "no cache, recalculating %s" % filename
            start = time.time()
            retval = func(*args, **kwargs)

            outfile = shelve.open(filename, "n", protocol=-1)
            outfile["signature"] = identifier
            outfile["filename"] = filename
            outfile["funcname"] = funcname
            outfile["args"] = rehashed
            outfile["kwargs"] = kwargs
            outfile["result"] = retval
            outfile.close()
            time.sleep(0.2)

            # indicate that the function is done being recalculated
            donefile = open(done_filename, "w")
            donefile.write("%10.15f\n" % (time.time() - start))
            donefile.close()

            # remove the busy flag
            os.remove(busy_filename)

        return retval