def commit_result(self, block_store, master_proxy): commit_bindings = {} for ref in self.additional_refs_to_publish: commit_bindings[ref.id] = ref if self.result is None: if self.save_continuation: save_cont_uri, _ = self.block_store.store_object(self.continuation, 'pickle', self.get_saved_continuation_object_id()) else: save_cont_uri = None master_proxy.commit_task(self.task_id, commit_bindings, save_cont_uri, self.replay_uuid_list) return serializable_result = map_leaf_values(self.convert_tasklocal_to_real_reference, self.result) _, size_hint = block_store.store_object(serializable_result, 'json', self.expected_outputs[0]) if size_hint < 128: result_ref = SWDataValue(self.expected_outputs[0], serializable_result) else: result_ref = SW2_ConcreteReference(self.expected_outputs[0], size_hint) result_ref.add_location_hint(self.block_store.netloc) commit_bindings[self.expected_outputs[0]] = result_ref if self.save_continuation: save_cont_uri, size_hint = self.block_store.store_object(self.continuation, 'pickle', self.get_saved_continuation_object_id()) else: save_cont_uri = None master_proxy.commit_task(self.task_id, commit_bindings, save_cont_uri, self.replay_uuid_list)
def default(self, id): safe_id = id if cherrypy.request.method == 'GET': is_streaming, filename = self.block_store.maybe_streaming_filename( safe_id) if is_streaming: cherrypy.response.headers['Pragma'] = 'streaming' try: response_body = serve_file(filename) return response_body except cherrypy.HTTPError as he: # The streaming file might have been deleted between calls to maybe_streaming_filename # and serve_file. Try again, because this time the non-streaming filename should be # available. if he.status == 404: if not is_streaming: raise cherrypy.response.headers.pop('Pragma', None) is_streaming, filename = self.block_store.maybe_streaming_filename( safe_id) try: serve_file(filename) except cherrypy.HTTPError as he: if he.status == 416: raise cherrypy.HTTPError(418) else: raise elif he.status == 416: raise cherrypy.HTTPError(418) else: raise elif cherrypy.request.method == 'POST': if self.backup_sender is not None: request_body = cherrypy.request.body.read() url = self.block_store.store_raw_file( StringIO.StringIO(request_body), safe_id) self.backup_sender.add_data(safe_id, request_body) else: url = self.block_store.store_raw_file(cherrypy.request.body, safe_id) if self.task_pool is not None: self.task_pool.publish_refs({ safe_id: SW2_ConcreteReference(safe_id, None, [self.block_store.netloc]) }) return simplejson.dumps(url) elif cherrypy.request.method == 'HEAD': if os.path.exists(self.block_store.filename(id)): return else: raise cherrypy.HTTPError(404) else: raise cherrypy.HTTPError(405)
def spawn_all(self, block_store, master_proxy): current_batch = [] if len(self.spawn_list) == 0: return current_index = 0 while current_index < len(self.spawn_list): must_wait = False if self.spawn_list[current_index].ignore: current_index += 1 continue current_cont = self.spawn_list[current_index].continuation if must_wait: if not self.is_running: return # Fire off the current batch. master_proxy.spawn_tasks(self.task_id, current_batch) # Iterate again on the same index. current_batch = [] continue else: # Store the continuation and add it to the task descriptor. if current_cont is not None: spawned_cont_id = self.get_spawn_continuation_object_id(self.spawn_list[current_index].id) _, size_hint = block_store.store_object(current_cont, 'pickle', spawned_cont_id) spawned_cont_ref = SW2_ConcreteReference(spawned_cont_id, size_hint) spawned_cont_ref.add_location_hint(self.block_store.netloc) self.spawn_list[current_index].task_descriptor['dependencies']['_cont'] = spawned_cont_ref self.maybe_also_publish(spawned_cont_ref) # Current task is now ready to be spawned. current_batch.append(self.spawn_list[current_index].task_descriptor) current_index += 1 if len(current_batch) > 0: if not self.is_running: return # Fire off the current batch. master_proxy.spawn_tasks(self.task_id, current_batch)
def index(self): if cherrypy.request.method == 'POST': id = self.block_store.allocate_new_id() if self.backup_sender is not None: request_body = cherrypy.request.body.read() self.block_store.store_raw_file( StringIO.StringIO(request_body), id) self.backup_sender.add_data(id, request_body) else: self.block_store.store_raw_file(cherrypy.request.body, id) if self.task_pool is not None: self.task_pool.publish_refs({ id: SW2_ConcreteReference(id, None, [self.block_store.netloc]) }) return simplejson.dumps(id) elif cherrypy.request.method == 'GET': return serve_file(self.block_store.generate_block_list_file()) else: raise cherrypy.HTTPError(405)
def spawn_exec_func(self, executor_name, exec_args, num_outputs): new_task_id = self.create_spawned_task_name() inputs = {} args = self.do_eager_thunks(exec_args) args_id, expected_output_ids = self.create_names_for_exec(executor_name, args, num_outputs) ret = [self.continuation.create_tasklocal_reference(SW2_FutureReference(expected_output_ids[i])) for i in range(num_outputs)] def args_check_mapper(leaf): if isinstance(leaf, SWLocalReference): real_ref = self.continuation.resolve_tasklocal_reference_with_ref(leaf) i = len(inputs) inputs[i] = real_ref ret = SWLocalReference(i) return ret return leaf transformed_args = map_leaf_values(args_check_mapper, args) _, size_hint = self.block_store.store_object(transformed_args, 'pickle', args_id) args_ref = SW2_ConcreteReference(args_id, size_hint) self.spawn_exec_counter += 1 args_ref.add_location_hint(self.block_store.netloc) self.maybe_also_publish(args_ref) inputs['_args'] = args_ref task_descriptor = {'task_id': new_task_id, 'handler': executor_name, 'dependencies': inputs, 'expected_outputs': expected_output_ids} self.spawn_list.append(SpawnListEntry(new_task_id, task_descriptor)) if len(self.spawn_list) > 20: self.spawn_all(self.block_store, self.master_proxy) self.spawn_list = [] return ret
def main(): parser = OptionParser() parser.add_option("-m", "--master", action="store", dest="master", help="Master URI", metavar="MASTER", default=os.getenv("SW_MASTER")) parser.add_option("-s", "--size", action="store", dest="size", help="Block size in bytes", metavar="N", type="int", default=None) parser.add_option("-n", "--num-blocks", action="store", dest="count", help="Number of blocks", metavar="N", type="int", default=1) parser.add_option("-r", "--replication", action="store", dest="replication", help="Copies of each block", type="int", metavar="N", default=1) parser.add_option("-d", "--delimiter", action="store", dest="delimiter", help="Block delimiter character", metavar="CHAR", default=None) parser.add_option("-l", "--lines", action="store_const", dest="delimiter", const="\n", help="Use newline as block delimiter") parser.add_option("-p", "--packet-size", action="store", dest="packet_size", help="Upload packet size in bytes", metavar="N", type="int", default=1048576) parser.add_option("-i", "--id", action="store", dest="name", help="Block name prefix", metavar="NAME", default=None) parser.add_option("-u", "--urls", action="store_true", dest="urls", help="Treat files as containing lists of URLs", default=False) (options, args) = parser.parse_args() workers = get_worker_netlocs(options.master) name_prefix = create_name_prefix(options.name) output_references = [] # Upload the data in extents. if not options.urls: if len(args) == 1: input_filename = args[0] extent_list = build_extent_list(input_filename, options) with open(input_filename, 'rb') as input_file: for i, (start, finish) in enumerate(extent_list): targets = select_targets(workers, options.replication) block_name = make_block_id(name_prefix, i) print >> sys.stderr, 'Uploading %s to (%s)' % ( block_name, ",".join(targets)) upload_extent_to_targets(input_file, block_name, start, finish, targets, options.packet_size) conc_ref = SW2_ConcreteReference(block_name, finish - start, targets) output_references.append(conc_ref) else: for i, input_filename in enumerate(args): with open(input_filename, 'rb') as input_file: targets = select_targets(workers, options.replication) block_name = make_block_id(name_prefix, i) block_size = os.path.getsize(input_filename) print >> sys.stderr, 'Uploading %s to (%s)' % ( input_filename, ",".join(targets)) upload_extent_to_targets(input_file, block_name, 0, block_size, targets, options.packet_size) conc_ref = SW2_ConcreteReference(block_name, block_size, targets) output_references.append(conc_ref) else: urls = [] for filename in args: with open(filename, 'r') as f: for line in f: urls.append(line.strip()) target_fetch_lists = {} for i, url in enumerate(urls): targets = select_targets(workers, options.replication) block_name = make_block_id(name_prefix, i) ref = SW2_FetchReference(block_name, url, i) for target in targets: try: tfl = target_fetch_lists[target] except KeyError: tfl = [] target_fetch_lists[target] = tfl tfl.append(ref) h = httplib2.Http() print >> sys.stderr, 'Getting size of %s' % url response, _ = h.request(url, 'HEAD') try: size = int(response['content-length']) except KeyError: size = 1048576 conc_ref = SW2_ConcreteReference(block_name, size, targets) output_references.append(conc_ref) pending_targets = {} failed_targets = set() for target, tfl in target_fetch_lists.items(): h2 = httplib2.Http() print >> sys.stderr, 'Uploading to %s' % target id = uuid.uuid4() response, _ = h2.request( 'http://%s/fetch/%s' % (target, id), 'POST', simplejson.dumps(tfl, cls=SWReferenceJSONEncoder)) if response.status != 202: print >> sys.stderr, 'Failed... %s' % target failed_targets.add(target) else: pending_targets[target] = id while True: # Wait until we get a non-try-again response from all of the targets. while len(pending_targets) > 0: time.sleep(3) for target, id in list(pending_targets.items()): try: response, _ = h2.request( 'http://%s/fetch/%s' % (target, id), 'GET') if response.status == 408: print >> sys.stderr, 'Continuing to wait for %s' % target continue elif response.status == 200: print >> sys.stderr, 'Succeded! %s' % target del pending_targets[target] else: print >> sys.stderr, 'Failed... %s' % target del pending_targets[target] failed_targets.add(target) except: print >> sys.stderr, 'Failed... %s' % target del pending_targets[target] failed_targets.add(target) if len(pending_targets) == 0 and len(failed_targets) == 0: break # All transfers have finished or failed, so check for failures. if len(failed_targets) > 0: # Redistribute blocks to working workers. redistribute_refs = {} for target in failed_targets: tfl = target_fetch_lists[target] for ref in tfl: redistribute_refs[ref.id] = ref conc_ref = SW2_ConcreteReference( block_name, size, targets) output_references[ref.index] = conc_ref target_fetch_lists = {} # We refetch the worker list, in case any have failed in the mean time. workers = get_worker_netlocs(options.master) for ref in redistribute_refs.values(): targets = select_targets(workers, options.replication) for target in targets: try: tfl = target_fetch_lists[target] except KeyError: tfl = [] target_fetch_lists[target] = tfl tfl.append(ref) for target, tfl in target_fetch_lists.items(): print >> sys.stderr, 'Retrying... uploading to %s' % target h2 = httplib2.Http() id = uuid.uuid4() _, _ = h2.request( 'http://%s/fetch/%s' % (target, id), 'POST', simplejson.dumps(tfl, cls=SWReferenceJSONEncoder)) pending_targets[target] = id failed_targets = set() # Upload the index object. index = simplejson.dumps(output_references, cls=SWReferenceJSONEncoder) block_name = '%s:index' % name_prefix suffix = '' i = 0 while os.path.exists(block_name + suffix): i += 1 suffix = '.%d' % i filename = block_name + suffix with open(filename, 'w') as f: simplejson.dump(output_references, f, cls=SWReferenceJSONEncoder) print >> sys.stderr, 'Wrote index to %s' % filename index_targets = select_targets(workers, options.replication) upload_string_to_targets(index, block_name, index_targets) #index_ref = SW2_ConcreteReference(block_name, len(index), index_targets) #print index_ref #print for target in index_targets: print 'swbs://%s/%s' % (target, block_name)
def _execute(self, block_store, task_id): if self.eager_fetch: file_inputs = self.get_filenames_eager(block_store, self.input_refs) _, transfer_ctx = self.get_filenames(block_store, []) else: file_inputs, transfer_ctx = self.get_filenames( block_store, self.input_refs) with self._lock: self.transfer_ctx = transfer_ctx file_outputs = [] for i in range(len(self.output_refs)): with tempfile.NamedTemporaryFile(delete=False) as this_file: file_outputs.append(this_file.name) if self.stream_output: stream_refs = {} for i, filename in enumerate(file_outputs): block_store.prepublish_file(filename, self.output_ids[i]) stream_ref = SW2_StreamReference(self.output_ids[i]) stream_ref.add_location_hint(block_store.netloc) stream_refs[self.output_ids[i]] = stream_ref self.master_proxy.publish_refs(task_id, stream_refs) self.proc = self.start_process(block_store, file_inputs, file_outputs, transfer_ctx) add_running_child(self.proc) rc = self.await_process(block_store, file_inputs, file_outputs, transfer_ctx) remove_running_child(self.proc) self.proc = None cherrypy.engine.publish("worker_event", "Executor: Waiting for transfers (for cache)") transfer_ctx.wait_for_all_transfers() if "trace_io" in self.debug_opts: transfer_ctx.log_traces() # We must do this before publishing, so that whole files are in the block store. with self._lock: transfer_ctx.cleanup(block_store) self.transfer_ctx = None # If we have fetched any objects to this worker, publish them at the master. extra_publishes = {} for ref in self.input_refs: if isinstance(ref, SW2_ConcreteReference ) and not block_store.netloc in ref.location_hints: extra_publishes[ref.id] = SW2_ConcreteReference( ref.id, ref.size_hint, [block_store.netloc]) for sweetheart in self.make_sweetheart: extra_publishes[sweetheart.id] = SW2_SweetheartReference( sweetheart.id, sweetheart.size_hint, block_store.netloc, [block_store.netloc]) if len(extra_publishes) > 0: self.master_proxy.publish_refs(task_id, extra_publishes) failure_bindings = transfer_ctx.get_failed_refs() if failure_bindings is not None: raise MissingInputException(failure_bindings) if rc != 0: raise OSError() cherrypy.engine.publish("worker_event", "Executor: Storing outputs") for i, filename in enumerate(file_outputs): if self.stream_output: _, size_hint = block_store.commit_file(filename, self.output_ids[i], can_move=True) else: _, size_hint = block_store.store_file(filename, self.output_ids[i], can_move=True) # XXX: fix provenance. real_ref = SW2_ConcreteReference(self.output_ids[i], size_hint) real_ref.add_location_hint(block_store.netloc) self.output_refs[i] = real_ref cherrypy.engine.publish("worker_event", "Executor: Done")
def main(): parser = OptionParser() parser.add_option("-m", "--master", action="store", dest="master", help="Master URI", metavar="MASTER", default=os.getenv("SW_MASTER")) parser.add_option("-i", "--id", action="store", dest="id", help="Job ID", metavar="ID", default="default") parser.add_option( "-e", "--env", action="store_true", dest="send_env", help= "Set this flag to send the current environment with the script as _env", default=False) (options, args) = parser.parse_args() if not options.master: parser.print_help() print >> sys.stderr, "Must specify master URI with --master" sys.exit(1) if len(args) != 1: parser.print_help() print >> sys.stderr, "Must specify one script file to execute, as argument" sys.exit(1) script_name = args[0] master_uri = options.master id = options.id print id, "STARTED", now_as_timestamp() parser = SWScriptParser() script = parser.parse(open(script_name, 'r').read()) print id, "FINISHED_PARSING", now_as_timestamp() if script is None: print "Script did not parse :(" exit() cont = SWContinuation(script, SimpleContext()) if options.send_env: cont.context.bind_identifier('env', os.environ) http = httplib2.Http() master_data_uri = urlparse.urljoin(master_uri, "/data/") pickled_cont = pickle.dumps(cont) (_, content) = http.request(master_data_uri, "POST", pickled_cont) cont_id = simplejson.loads(content) out_id = 'joboutput:%s' % cont_id print id, "SUBMITTED_CONT", now_as_timestamp() #print continuation_uri master_netloc = urlparse.urlparse(master_uri).netloc task_descriptor = { 'dependencies': { '_cont': SW2_ConcreteReference(cont_id, len(pickled_cont), [master_netloc]) }, 'handler': 'swi', 'expected_outputs': [out_id] } master_task_submit_uri = urlparse.urljoin(master_uri, "/job/") (_, content) = http.request( master_task_submit_uri, "POST", simplejson.dumps(task_descriptor, cls=SWReferenceJSONEncoder)) print id, "SUBMITTED_JOB", now_as_timestamp() out = simplejson.loads(content) notify_url = urlparse.urljoin(master_uri, "/job/%s/completion" % out['job_id']) job_url = urlparse.urljoin(master_uri, "/browse/job/%s" % out['job_id']) print id, "JOB_URL", job_url #print "Blocking to get final result" (_, content) = http.request(notify_url) completion_result = simplejson.loads(content, object_hook=json_decode_object_hook) if "error" in completion_result.keys(): print id, "ERROR", completion_result["error"] return None else: print id, "GOT_RESULT", now_as_timestamp() #print content return completion_result["result_ref"]