示例#1
0
    def commit_result(self, block_store, master_proxy):
        
        commit_bindings = {}
        for ref in self.additional_refs_to_publish:
            commit_bindings[ref.id] = ref
        
        if self.result is None:
            if self.save_continuation:
                save_cont_uri, _ = self.block_store.store_object(self.continuation, 'pickle', self.get_saved_continuation_object_id())
            else:
                save_cont_uri = None
            master_proxy.commit_task(self.task_id, commit_bindings, save_cont_uri, self.replay_uuid_list)
            return
        
        serializable_result = map_leaf_values(self.convert_tasklocal_to_real_reference, self.result)

        _, size_hint = block_store.store_object(serializable_result, 'json', self.expected_outputs[0])
        if size_hint < 128:
            result_ref = SWDataValue(self.expected_outputs[0], serializable_result)
        else:
            result_ref = SW2_ConcreteReference(self.expected_outputs[0], size_hint)
            result_ref.add_location_hint(self.block_store.netloc)
            
        commit_bindings[self.expected_outputs[0]] = result_ref
        
        if self.save_continuation:
            save_cont_uri, size_hint = self.block_store.store_object(self.continuation, 'pickle', self.get_saved_continuation_object_id())
        else:
            save_cont_uri = None
        
        master_proxy.commit_task(self.task_id, commit_bindings, save_cont_uri, self.replay_uuid_list)
示例#2
0
    def default(self, id):
        safe_id = id
        if cherrypy.request.method == 'GET':
            is_streaming, filename = self.block_store.maybe_streaming_filename(
                safe_id)
            if is_streaming:
                cherrypy.response.headers['Pragma'] = 'streaming'
            try:
                response_body = serve_file(filename)
                return response_body
            except cherrypy.HTTPError as he:
                # The streaming file might have been deleted between calls to maybe_streaming_filename
                # and serve_file. Try again, because this time the non-streaming filename should be
                # available.
                if he.status == 404:
                    if not is_streaming:
                        raise
                    cherrypy.response.headers.pop('Pragma', None)
                    is_streaming, filename = self.block_store.maybe_streaming_filename(
                        safe_id)
                    try:
                        serve_file(filename)
                    except cherrypy.HTTPError as he:
                        if he.status == 416:
                            raise cherrypy.HTTPError(418)
                        else:
                            raise
                elif he.status == 416:
                    raise cherrypy.HTTPError(418)
                else:
                    raise

        elif cherrypy.request.method == 'POST':
            if self.backup_sender is not None:
                request_body = cherrypy.request.body.read()
                url = self.block_store.store_raw_file(
                    StringIO.StringIO(request_body), safe_id)
                self.backup_sender.add_data(safe_id, request_body)
            else:
                url = self.block_store.store_raw_file(cherrypy.request.body,
                                                      safe_id)
            if self.task_pool is not None:
                self.task_pool.publish_refs({
                    safe_id:
                    SW2_ConcreteReference(safe_id, None,
                                          [self.block_store.netloc])
                })
            return simplejson.dumps(url)

        elif cherrypy.request.method == 'HEAD':
            if os.path.exists(self.block_store.filename(id)):
                return
            else:
                raise cherrypy.HTTPError(404)

        else:
            raise cherrypy.HTTPError(405)
示例#3
0
 def spawn_all(self, block_store, master_proxy):
     current_batch = []
     
     if len(self.spawn_list) == 0:
         return
     
     current_index = 0
     while current_index < len(self.spawn_list):
         
         must_wait = False
         
         if self.spawn_list[current_index].ignore:
             current_index += 1
             continue
         
         current_cont = self.spawn_list[current_index].continuation
             
         if must_wait:
             
             if not self.is_running:
                 return
             
             # Fire off the current batch.
             master_proxy.spawn_tasks(self.task_id, current_batch)
             
             # Iterate again on the same index.
             current_batch = []
             continue
             
         else:
             
             # Store the continuation and add it to the task descriptor.
             if current_cont is not None:
                 spawned_cont_id = self.get_spawn_continuation_object_id(self.spawn_list[current_index].id)
                 _, size_hint = block_store.store_object(current_cont, 'pickle', spawned_cont_id)
                 spawned_cont_ref = SW2_ConcreteReference(spawned_cont_id, size_hint)
                 spawned_cont_ref.add_location_hint(self.block_store.netloc)
                 self.spawn_list[current_index].task_descriptor['dependencies']['_cont'] = spawned_cont_ref
                 self.maybe_also_publish(spawned_cont_ref)
         
             # Current task is now ready to be spawned.
             current_batch.append(self.spawn_list[current_index].task_descriptor)
             current_index += 1
         
     if len(current_batch) > 0:
         
         if not self.is_running:
             return
         
         # Fire off the current batch.
         master_proxy.spawn_tasks(self.task_id, current_batch)
示例#4
0
 def index(self):
     if cherrypy.request.method == 'POST':
         id = self.block_store.allocate_new_id()
         if self.backup_sender is not None:
             request_body = cherrypy.request.body.read()
             self.block_store.store_raw_file(
                 StringIO.StringIO(request_body), id)
             self.backup_sender.add_data(id, request_body)
         else:
             self.block_store.store_raw_file(cherrypy.request.body, id)
         if self.task_pool is not None:
             self.task_pool.publish_refs({
                 id:
                 SW2_ConcreteReference(id, None, [self.block_store.netloc])
             })
         return simplejson.dumps(id)
     elif cherrypy.request.method == 'GET':
         return serve_file(self.block_store.generate_block_list_file())
     else:
         raise cherrypy.HTTPError(405)
示例#5
0
    def spawn_exec_func(self, executor_name, exec_args, num_outputs):
        
        new_task_id = self.create_spawned_task_name()
        inputs = {}
        
        args = self.do_eager_thunks(exec_args)

        args_id, expected_output_ids = self.create_names_for_exec(executor_name, args, num_outputs)
        ret = [self.continuation.create_tasklocal_reference(SW2_FutureReference(expected_output_ids[i])) for i in range(num_outputs)]

        def args_check_mapper(leaf):
            if isinstance(leaf, SWLocalReference):
                real_ref = self.continuation.resolve_tasklocal_reference_with_ref(leaf)
                i = len(inputs)
                inputs[i] = real_ref
                ret = SWLocalReference(i)
                return ret
            return leaf
        
        transformed_args = map_leaf_values(args_check_mapper, args)
        _, size_hint = self.block_store.store_object(transformed_args, 'pickle', args_id)
        args_ref = SW2_ConcreteReference(args_id, size_hint)
        self.spawn_exec_counter += 1
        args_ref.add_location_hint(self.block_store.netloc)
        self.maybe_also_publish(args_ref)
        
        inputs['_args'] = args_ref

        task_descriptor = {'task_id': new_task_id,
                           'handler': executor_name, 
                           'dependencies': inputs,
                           'expected_outputs': expected_output_ids}
        
        self.spawn_list.append(SpawnListEntry(new_task_id, task_descriptor))
        
        if len(self.spawn_list) > 20:
            self.spawn_all(self.block_store, self.master_proxy)
            self.spawn_list = []
        
        return ret
示例#6
0
def main():
    parser = OptionParser()
    parser.add_option("-m",
                      "--master",
                      action="store",
                      dest="master",
                      help="Master URI",
                      metavar="MASTER",
                      default=os.getenv("SW_MASTER"))
    parser.add_option("-s",
                      "--size",
                      action="store",
                      dest="size",
                      help="Block size in bytes",
                      metavar="N",
                      type="int",
                      default=None)
    parser.add_option("-n",
                      "--num-blocks",
                      action="store",
                      dest="count",
                      help="Number of blocks",
                      metavar="N",
                      type="int",
                      default=1)
    parser.add_option("-r",
                      "--replication",
                      action="store",
                      dest="replication",
                      help="Copies of each block",
                      type="int",
                      metavar="N",
                      default=1)
    parser.add_option("-d",
                      "--delimiter",
                      action="store",
                      dest="delimiter",
                      help="Block delimiter character",
                      metavar="CHAR",
                      default=None)
    parser.add_option("-l",
                      "--lines",
                      action="store_const",
                      dest="delimiter",
                      const="\n",
                      help="Use newline as block delimiter")
    parser.add_option("-p",
                      "--packet-size",
                      action="store",
                      dest="packet_size",
                      help="Upload packet size in bytes",
                      metavar="N",
                      type="int",
                      default=1048576)
    parser.add_option("-i",
                      "--id",
                      action="store",
                      dest="name",
                      help="Block name prefix",
                      metavar="NAME",
                      default=None)
    parser.add_option("-u",
                      "--urls",
                      action="store_true",
                      dest="urls",
                      help="Treat files as containing lists of URLs",
                      default=False)
    (options, args) = parser.parse_args()

    workers = get_worker_netlocs(options.master)

    name_prefix = create_name_prefix(options.name)

    output_references = []

    # Upload the data in extents.
    if not options.urls:

        if len(args) == 1:
            input_filename = args[0]
            extent_list = build_extent_list(input_filename, options)

            with open(input_filename, 'rb') as input_file:
                for i, (start, finish) in enumerate(extent_list):
                    targets = select_targets(workers, options.replication)
                    block_name = make_block_id(name_prefix, i)
                    print >> sys.stderr, 'Uploading %s to (%s)' % (
                        block_name, ",".join(targets))
                    upload_extent_to_targets(input_file, block_name, start,
                                             finish, targets,
                                             options.packet_size)
                    conc_ref = SW2_ConcreteReference(block_name,
                                                     finish - start, targets)
                    output_references.append(conc_ref)

        else:

            for i, input_filename in enumerate(args):
                with open(input_filename, 'rb') as input_file:
                    targets = select_targets(workers, options.replication)
                    block_name = make_block_id(name_prefix, i)
                    block_size = os.path.getsize(input_filename)
                    print >> sys.stderr, 'Uploading %s to (%s)' % (
                        input_filename, ",".join(targets))
                    upload_extent_to_targets(input_file, block_name, 0,
                                             block_size, targets,
                                             options.packet_size)
                    conc_ref = SW2_ConcreteReference(block_name, block_size,
                                                     targets)
                    output_references.append(conc_ref)

    else:

        urls = []
        for filename in args:
            with open(filename, 'r') as f:
                for line in f:
                    urls.append(line.strip())

        target_fetch_lists = {}

        for i, url in enumerate(urls):
            targets = select_targets(workers, options.replication)
            block_name = make_block_id(name_prefix, i)
            ref = SW2_FetchReference(block_name, url, i)
            for target in targets:
                try:
                    tfl = target_fetch_lists[target]
                except KeyError:
                    tfl = []
                    target_fetch_lists[target] = tfl
                tfl.append(ref)
            h = httplib2.Http()
            print >> sys.stderr, 'Getting size of %s' % url
            response, _ = h.request(url, 'HEAD')
            try:
                size = int(response['content-length'])
            except KeyError:
                size = 1048576
            conc_ref = SW2_ConcreteReference(block_name, size, targets)
            output_references.append(conc_ref)

        pending_targets = {}
        failed_targets = set()

        for target, tfl in target_fetch_lists.items():
            h2 = httplib2.Http()
            print >> sys.stderr, 'Uploading to %s' % target
            id = uuid.uuid4()
            response, _ = h2.request(
                'http://%s/fetch/%s' % (target, id), 'POST',
                simplejson.dumps(tfl, cls=SWReferenceJSONEncoder))
            if response.status != 202:
                print >> sys.stderr, 'Failed... %s' % target
                failed_targets.add(target)
            else:
                pending_targets[target] = id

        while True:

            # Wait until we get a non-try-again response from all of the targets.
            while len(pending_targets) > 0:
                time.sleep(3)
                for target, id in list(pending_targets.items()):
                    try:
                        response, _ = h2.request(
                            'http://%s/fetch/%s' % (target, id), 'GET')
                        if response.status == 408:
                            print >> sys.stderr, 'Continuing to wait for %s' % target
                            continue
                        elif response.status == 200:
                            print >> sys.stderr, 'Succeded! %s' % target
                            del pending_targets[target]
                        else:
                            print >> sys.stderr, 'Failed... %s' % target
                            del pending_targets[target]
                            failed_targets.add(target)
                    except:
                        print >> sys.stderr, 'Failed... %s' % target
                        del pending_targets[target]
                        failed_targets.add(target)

            if len(pending_targets) == 0 and len(failed_targets) == 0:
                break

            # All transfers have finished or failed, so check for failures.
            if len(failed_targets) > 0:

                # Redistribute blocks to working workers.
                redistribute_refs = {}

                for target in failed_targets:
                    tfl = target_fetch_lists[target]
                    for ref in tfl:
                        redistribute_refs[ref.id] = ref
                        conc_ref = SW2_ConcreteReference(
                            block_name, size, targets)
                        output_references[ref.index] = conc_ref

                target_fetch_lists = {}

                # We refetch the worker list, in case any have failed in the mean time.
                workers = get_worker_netlocs(options.master)

                for ref in redistribute_refs.values():
                    targets = select_targets(workers, options.replication)
                    for target in targets:
                        try:
                            tfl = target_fetch_lists[target]
                        except KeyError:
                            tfl = []
                            target_fetch_lists[target] = tfl
                        tfl.append(ref)

                for target, tfl in target_fetch_lists.items():
                    print >> sys.stderr, 'Retrying... uploading to %s' % target
                    h2 = httplib2.Http()
                    id = uuid.uuid4()
                    _, _ = h2.request(
                        'http://%s/fetch/%s' % (target, id), 'POST',
                        simplejson.dumps(tfl, cls=SWReferenceJSONEncoder))
                    pending_targets[target] = id

                failed_targets = set()

    # Upload the index object.
    index = simplejson.dumps(output_references, cls=SWReferenceJSONEncoder)
    block_name = '%s:index' % name_prefix

    suffix = ''
    i = 0
    while os.path.exists(block_name + suffix):
        i += 1
        suffix = '.%d' % i
    filename = block_name + suffix
    with open(filename, 'w') as f:
        simplejson.dump(output_references, f, cls=SWReferenceJSONEncoder)
    print >> sys.stderr, 'Wrote index to %s' % filename

    index_targets = select_targets(workers, options.replication)
    upload_string_to_targets(index, block_name, index_targets)

    #index_ref = SW2_ConcreteReference(block_name, len(index), index_targets)

    #print index_ref
    #print
    for target in index_targets:
        print 'swbs://%s/%s' % (target, block_name)
示例#7
0
    def _execute(self, block_store, task_id):
        if self.eager_fetch:
            file_inputs = self.get_filenames_eager(block_store,
                                                   self.input_refs)
            _, transfer_ctx = self.get_filenames(block_store, [])
        else:
            file_inputs, transfer_ctx = self.get_filenames(
                block_store, self.input_refs)
        with self._lock:
            self.transfer_ctx = transfer_ctx
        file_outputs = []
        for i in range(len(self.output_refs)):
            with tempfile.NamedTemporaryFile(delete=False) as this_file:
                file_outputs.append(this_file.name)

        if self.stream_output:
            stream_refs = {}
            for i, filename in enumerate(file_outputs):
                block_store.prepublish_file(filename, self.output_ids[i])
                stream_ref = SW2_StreamReference(self.output_ids[i])
                stream_ref.add_location_hint(block_store.netloc)
                stream_refs[self.output_ids[i]] = stream_ref
            self.master_proxy.publish_refs(task_id, stream_refs)

        self.proc = self.start_process(block_store, file_inputs, file_outputs,
                                       transfer_ctx)
        add_running_child(self.proc)

        rc = self.await_process(block_store, file_inputs, file_outputs,
                                transfer_ctx)
        remove_running_child(self.proc)

        self.proc = None

        cherrypy.engine.publish("worker_event",
                                "Executor: Waiting for transfers (for cache)")
        transfer_ctx.wait_for_all_transfers()
        if "trace_io" in self.debug_opts:
            transfer_ctx.log_traces()

        # We must do this before publishing, so that whole files are in the block store.
        with self._lock:
            transfer_ctx.cleanup(block_store)
            self.transfer_ctx = None

        # If we have fetched any objects to this worker, publish them at the master.
        extra_publishes = {}
        for ref in self.input_refs:
            if isinstance(ref, SW2_ConcreteReference
                          ) and not block_store.netloc in ref.location_hints:
                extra_publishes[ref.id] = SW2_ConcreteReference(
                    ref.id, ref.size_hint, [block_store.netloc])
        for sweetheart in self.make_sweetheart:
            extra_publishes[sweetheart.id] = SW2_SweetheartReference(
                sweetheart.id, sweetheart.size_hint, block_store.netloc,
                [block_store.netloc])
        if len(extra_publishes) > 0:
            self.master_proxy.publish_refs(task_id, extra_publishes)

        failure_bindings = transfer_ctx.get_failed_refs()
        if failure_bindings is not None:
            raise MissingInputException(failure_bindings)

        if rc != 0:
            raise OSError()
        cherrypy.engine.publish("worker_event", "Executor: Storing outputs")
        for i, filename in enumerate(file_outputs):

            if self.stream_output:
                _, size_hint = block_store.commit_file(filename,
                                                       self.output_ids[i],
                                                       can_move=True)
            else:
                _, size_hint = block_store.store_file(filename,
                                                      self.output_ids[i],
                                                      can_move=True)

            # XXX: fix provenance.
            real_ref = SW2_ConcreteReference(self.output_ids[i], size_hint)
            real_ref.add_location_hint(block_store.netloc)
            self.output_refs[i] = real_ref

        cherrypy.engine.publish("worker_event", "Executor: Done")
示例#8
0
def main():
    parser = OptionParser()
    parser.add_option("-m",
                      "--master",
                      action="store",
                      dest="master",
                      help="Master URI",
                      metavar="MASTER",
                      default=os.getenv("SW_MASTER"))
    parser.add_option("-i",
                      "--id",
                      action="store",
                      dest="id",
                      help="Job ID",
                      metavar="ID",
                      default="default")
    parser.add_option(
        "-e",
        "--env",
        action="store_true",
        dest="send_env",
        help=
        "Set this flag to send the current environment with the script as _env",
        default=False)
    (options, args) = parser.parse_args()

    if not options.master:
        parser.print_help()
        print >> sys.stderr, "Must specify master URI with --master"
        sys.exit(1)

    if len(args) != 1:
        parser.print_help()
        print >> sys.stderr, "Must specify one script file to execute, as argument"
        sys.exit(1)

    script_name = args[0]
    master_uri = options.master
    id = options.id

    print id, "STARTED", now_as_timestamp()

    parser = SWScriptParser()

    script = parser.parse(open(script_name, 'r').read())

    print id, "FINISHED_PARSING", now_as_timestamp()

    if script is None:
        print "Script did not parse :("
        exit()

    cont = SWContinuation(script, SimpleContext())
    if options.send_env:
        cont.context.bind_identifier('env', os.environ)

    http = httplib2.Http()

    master_data_uri = urlparse.urljoin(master_uri, "/data/")
    pickled_cont = pickle.dumps(cont)
    (_, content) = http.request(master_data_uri, "POST", pickled_cont)
    cont_id = simplejson.loads(content)

    out_id = 'joboutput:%s' % cont_id

    print id, "SUBMITTED_CONT", now_as_timestamp()

    #print continuation_uri

    master_netloc = urlparse.urlparse(master_uri).netloc
    task_descriptor = {
        'dependencies': {
            '_cont':
            SW2_ConcreteReference(cont_id, len(pickled_cont), [master_netloc])
        },
        'handler': 'swi',
        'expected_outputs': [out_id]
    }

    master_task_submit_uri = urlparse.urljoin(master_uri, "/job/")
    (_, content) = http.request(
        master_task_submit_uri, "POST",
        simplejson.dumps(task_descriptor, cls=SWReferenceJSONEncoder))

    print id, "SUBMITTED_JOB", now_as_timestamp()

    out = simplejson.loads(content)

    notify_url = urlparse.urljoin(master_uri,
                                  "/job/%s/completion" % out['job_id'])
    job_url = urlparse.urljoin(master_uri, "/browse/job/%s" % out['job_id'])

    print id, "JOB_URL", job_url

    #print "Blocking to get final result"
    (_, content) = http.request(notify_url)
    completion_result = simplejson.loads(content,
                                         object_hook=json_decode_object_hook)
    if "error" in completion_result.keys():
        print id, "ERROR", completion_result["error"]
        return None
    else:
        print id, "GOT_RESULT", now_as_timestamp()
        #print content
        return completion_result["result_ref"]