def make_central_pipeline(self, inputs, local_head_gate): """ :param inputs: :param local_head_gate: :return: (id_and_count, record_id, intermediate_name, superchunk_num_records, superchunk_matrix) + rest_of_input """ inputs = sanitize_generator(inputs) queue_name = "sort_ready_to_decomp" ready_to_decomp = pipeline.join(upstream_tensors=inputs, parallel=self.decompress_parallel, capacity=self.pre_decomp_capacity, multi=True, name=queue_name, shared_name=queue_name) with tf.name_scope("decompression_stage"): ready_to_sort_items = sanitize_generator( self.make_decomp_stage(ready_to_decomp=ready_to_decomp)) assert len(ready_to_sort_items) > 0 queue_name = "pre_sort_gate" example_item = ready_to_sort_items[0] pre_sort_gate = gate.StreamingGate( name=queue_name, shared_name=queue_name, id_and_count_upstream=example_item[0], sample_tensors=example_item[1:], capacity=self.pre_sort_gate_capacity, limit_upstream=True, limit_downstream=False) gate.add_credit_supplier_from_gates(upstream_gate=local_head_gate, downstream_gate=pre_sort_gate) enqueue_ops = tuple( pre_sort_gate.enqueue(id_and_count=a[0], components=a[1:]) for a in ready_to_sort_items) gate.add_gate_runner(gate_runner=gate.GateRunner( gate=pre_sort_gate, enqueue_ops=enqueue_ops)) to_sort_ops = tuple( pre_sort_gate.dequeue_many(count=self.sort_batch) for _ in range(self.sort_parallel)) with tf.name_scope("sort_stage"): sorted = tuple(self.make_sort_stage(ready_to_sort=to_sort_ops)) sorted_chunks, control_deps = zip(*sorted) queue_name = "sort_ready_to_write" ready_to_write = pipeline.join(upstream_tensors=sorted_chunks, control_dependencies=control_deps, parallel=self.write_parallel, multi=True, capacity=self.pre_write_capacity, name=queue_name, shared_name=queue_name) return ready_to_write
def make_central_pipeline(self, read_columns, head_gate): """ :param read_columns: a generator of (id_and_count, ([ list, of, file, mmap, handles, ... ], {pass around})) :return: a generator of (id_and_count, (chunk_matrix, record_id, {pass around}) """ read_columns = sanitize_generator(read_columns) if self.order_by == location_value: read_columns = sanitize_generator( self.make_index_building_stage(read_columns=read_columns)) # a gen of (id_and_count, components) # components = ([ handles, columns ]) queue_name = "pre_merge_barrier_gate" example_idc, example_comp = read_columns[0] pre_merge_gate = gate.StreamingGate( name=queue_name, shared_name=queue_name, id_and_count_upstream=example_idc, sample_tensors=example_comp, capacity=self.pre_merge_gate_capacity, limit_upstream=True, limit_downstream=False) gate.add_credit_supplier_from_gates(upstream_gate=head_gate, downstream_gate=pre_merge_gate) enqueue_ops = tuple( pre_merge_gate.enqueue(id_and_count=idc, components=comp) for idc, comp in read_columns) gate.add_gate_runner(gate_runner=gate.GateRunner( gate=pre_merge_gate, enqueue_ops=enqueue_ops)) to_merge = (pre_merge_gate.dequeue_whole_dataset() for _ in range(self.merge_parallel)) with tf.name_scope("merge_merge_stage"): to_compress = tuple(self.make_merge_stage(merge_batches=to_merge)) with tf.name_scope("merge_compress_stage"): to_write_items = tuple( self.make_compress_stage( to_compress=to_compress)) # returns a generator control_deps = tuple(a[1] for a in to_write_items) to_write_items = tuple(a[0] for a in to_write_items) queue_name = "merge_pre_write_queue" to_write = pipeline.join(upstream_tensors=to_write_items, control_dependencies=control_deps, parallel=self.write_parallel, capacity=self.pre_write_capacity, multi=True, name=queue_name, shared_name=queue_name) return to_write
def make_head_gate(self, upstream_gate): id_and_count, components = upstream_gate.dequeue_partition(count=self.global_batch) gate_name = "_".join((self.local_dest, "head_gate")) head_gate = gate.StreamingGate(limit_upstream=False, limit_downstream=False, # turning both off because there is only one needed, and no credit control is necessary id_and_count_upstream=id_and_count, sample_tensors=components, sample_tensors_are_batch=True, capacity=2, name=gate_name, shared_name=gate_name) enq_ops = (head_gate.enqueue_many(id_and_count=id_and_count, components=components),) gate.add_gate_runner(gate_runner=gate.GateRunner(gate=head_gate, enqueue_ops=enq_ops)) return head_gate
def make_local_gate(upstream_gate): idc, comp = upstream_gate.dequeue_partition(count=1) head_gate = gate.StreamingGate(limit_upstream=False, limit_downstream=False, id_and_count_upstream=idc, sample_tensors=comp, sample_tensors_are_batch=True) enq_ops = (head_gate.enqueue_many(id_and_count=idc, components=comp), ) gate.add_gate_runner( gate_runner=gate.GateRunner(gate=head_gate, enqueue_ops=enq_ops)) return head_gate
def make_head_gate(self, upstream_gate): id_and_count, components = upstream_gate.dequeue_whole_partition() gate_name = "merge_head_gate" head_gate = gate.StreamingGate( limit_upstream= False, # we don't limit upstream, as that is from the central queue limit_downstream= True, # we do limit the downstream to the batching join gate so it doesn't suck up all resources id_and_count_upstream=id_and_count, sample_tensors=components, name=gate_name, shared_name=gate_name, capacity=self.head_gate_capacity, sample_tensors_are_batch=True) enq_ops = (head_gate.enqueue_many(id_and_count=id_and_count, components=components), ) gate.add_gate_runner( gate_runner=gate.GateRunner(gate=head_gate, enqueue_ops=enq_ops)) return head_gate
def _construct_graph(self, args, device_map, num_client_slots): gate_name = "ingress_gate" num_merge = args.merge_stages num_combo = args.combo_stages num_align = args.align_stages if (num_merge + num_combo) < 1: raise Exception("Need >0 merge stages. Got {m} pure merge and {c} combo".format(m=num_merge, c=num_combo)) if (num_align + num_combo) < 1: raise Exception("Need >0 align stages. Got {a} pure align stages and {c} combo".format(a=num_align, c=num_combo)) if args.parallel_open_requests is not None: capacity_between_gates = args.parallel_open_requests else: capacity_between_gates = int(num_client_slots * args.parallel_open_request_expansion_factor) if capacity_between_gates < 1: raise Exception("Capacity between gates is <1 ({c})".format(c=capacity_between_gates)) args.parallel_open_requests = capacity_between_gates self.log.info("Capacity between gates: {}".format(capacity_between_gates)) with tf.name_scope(gate_name): ingress = gate.IngressGate(dtypes=self.ingress_dtypes, shapes=self.ingress_shapes, capacity=capacity_between_gates, shared_name=gate_name, name=gate_name) with tf.name_scope("align_sort_stage"): align_stages = tuple(fused_align_sort.LocalFusedStage(args=args) for _ in range(num_align)) small_align_stages = tuple(fused_align_sort.SmallLocalFusedStage(args=args) for _ in range(num_combo)) def make_align_stages(stages, align_devices): for stage, device in zip(align_stages, align_devices): with device(): device_graph = stage.make_graph(upstream_gate=ingress) try: # convert to a tuple if it returns a generator device_graph[0] except TypeError: device_graph = tuple(device_graph) assert len(stage.run_first) > 0 for item in stage.run_first: self._add_run_first(tensor=item) yield device_graph outputs = tuple(itertools.chain.from_iterable( make_align_stages(stages=s, align_devices=devices) for s, devices in ( (align_stages, device_map.get(align_sort_key, None)), (small_align_stages, device_map.get(combo_key, None)) ) if devices is not None )) assert len(outputs) == num_align + num_combo, "Expected {e} align stage ({a} pure align and {c} combo) but only got {actual}".format( e=num_align+num_combo, a=num_align, c=num_combo, actual=len(outputs)) outputs = tuple(itertools.chain.from_iterable(outputs)) # flattens it example_output = outputs[0] if args.credit_link == credit_link_end_to_end: merge_gate_kwargs = { "limit_upstream": False, "limit_downstream": False } else: merge_gate_kwargs = { "capacity": capacity_between_gates } with tf.name_scope("inter_stage_gate"): gate_name = "ready_to_merge" merge_gate = gate.StreamingGate( sample_tensors=example_output[1:-1], id_and_count_upstream=example_output[0], join=True, name=gate_name, shared_name=gate_name, **merge_gate_kwargs ) enqueue_ops = tuple(merge_gate.enqueue(id_and_count=a[0], components=a[1:-1]) for a in outputs) if args.align_counters: if getattr(args, "summary", False): with tf.name_scope(None): # clears this out of the inter_stage_gate scope with tf.name_scope(performance_name_scope): enqueue_ops = tuple(make_counter(counter_name="sorted_counter", summary_name="sorted_num_records", deps_and_counters=zip( enqueue_ops, (a[-1] for a in outputs) ))) else: self.log.warning("Align counters requested, but no summary was requested. Please enable summary for this to work.") gate.add_gate_runner(gate_runner=gate.GateRunner(gate=merge_gate, enqueue_ops=enqueue_ops, device=merge_gate.device)) if args.credit_link == credit_link_successive: gate.add_credit_supplier_from_gates(upstream_gate=ingress, downstream_gate=merge_gate) with tf.name_scope("merge_stage"): merge_stages = tuple(merge_stage.LocalMergeStage(args=args) for _ in range(num_merge)) small_merge_stages = tuple(merge_stage.SmallLocalMergeStage(args=args) for _ in range(num_combo)) def make_merge_stages(stages, merge_devices): for stage, device in zip(merge_stages, merge_devices): with device(): device_graph = stage.make_graph(upstream_gate=merge_gate) try: device_graph[0] except TypeError: device_graph = tuple(device_graph) yield device_graph merge_stage_outputs = tuple(itertools.chain.from_iterable( make_merge_stages(stages=s, merge_devices=devices) for s, devices in ( (merge_stages, device_map.get(merge_key, None)), (small_merge_stages, device_map.get(combo_key, None)) ) if devices is not None )) assert len(merge_stage_outputs) == num_merge + num_combo, "Expected {e} merge devices ({p} pure merge and {c} combo}, but only got {actual}".format( p=num_merge, c=num_combo, e=num_merge+num_combo, actual=len(merge_stage_outputs) ) merge_stage_outputs = tuple(itertools.chain.from_iterable(merge_stage_outputs)) # flattens it example_output = merge_stage_outputs[0] gate_name = "egress_gate" with tf.name_scope(gate_name): egress = gate.EgressGate(capacity=capacity_between_gates, sample_tensors=example_output[1:], id_and_count_upstream=example_output[0], join=True, name=gate_name, shared_name=gate_name) enqueue_ops = tuple(egress.enqueue(id_and_count=a[0], components=a[1:]) for a in merge_stage_outputs) if args.merge_counters: if getattr(args, "summary", False): with tf.name_scope(None): with tf.name_scope(performance_name_scope): enqueue_ops = tuple(make_counter(counter_name="merged_counter", summary_name="merged_num_records", deps_and_counters=zip( enqueue_ops, (a[3] for a in merge_stage_outputs) ))) else: self.log.warning("Merge counters requested, but no summary was requested. Please enable summary for this to work") gate.add_gate_runner(gate_runner=gate.GateRunner(gate=egress, enqueue_ops=enqueue_ops, device=egress.device)) if args.credit_link == credit_link_end_to_end: gate.add_credit_supplier_from_gates(upstream_gate=ingress, downstream_gate=egress) else: gate.add_credit_supplier_from_gates(upstream_gate=merge_gate, downstream_gate=egress) self.close_op = (ingress.close(), egress.close()) with tf.name_scope("client_slots"): unknown_shape = tf.TensorShape([None]) batch_ingress_shapes = tuple(unknown_shape.concatenate(ishape) for ishape in self.ingress_shapes) for idx in range(num_client_slots): ingress_placeholders = tuple(tf.placeholder(dtype=dtype, shape=shape, name="client_slot_{}".format(idx)) for dtype, shape in zip(self.ingress_dtypes, batch_ingress_shapes)) ingress_enqueue = ingress.enqueue_request(components=ingress_placeholders, name="ingress_enqueue_{}".format(idx)) egress_dequeue = egress.dequeue_request(request_id=ingress_enqueue, name="egress_dequeue_{}".format(idx)) yield self.ClientSlot(ingress_placeholders=ingress_placeholders, egress_dequeue=egress_dequeue)
def _construct_graph(self, args, device_map, num_client_slots): gate_name = "ingress_gate" num_align = args.align_stages num_sort = args.sort_stages num_merge = args.merge_stages align_devices = device_map[align_key] sort_devices = device_map[sort_key] merge_devices = device_map[merge_key] if args.parallel_open_requests is not None: capacity_between_gates = args.parallel_open_requests else: capacity_between_gates = int( num_client_slots * args.parallel_open_request_expansion_factor) if capacity_between_gates < 1: raise Exception("Capacity between gates is <1 ({c})".format( c=capacity_between_gates)) args.parallel_open_requests = capacity_between_gates self.log.info( "Capacity between gates: {}".format(capacity_between_gates)) if args.credit_link == credit_link_end_to_end: inter_gate_kwargs = { "limit_upstream": False, "limit_downstream": False } else: inter_gate_kwargs = {"capacity": capacity_between_gates} def generate_local_pipelines(name, num_pipelines, devices, upstream_gate, pipeline_type): with tf.name_scope("{}_pipeline".format(name)): pipelines = tuple( pipeline_type(args=args) for _ in range(num_pipelines)) assert len(pipelines) == len(devices) for pipeline, device in zip(pipelines, devices): with device(): device_graph = pipeline.make_graph( upstream_gate=upstream_gate) try: # convert to a tuple if it returns a generator device_graph[0] except TypeError: device_graph = tuple(device_graph) run_first = pipeline.run_first for item in run_first: self._add_run_first(item) yield device_graph with tf.name_scope(gate_name): ingress = gate.IngressGate(dtypes=self.ingress_dtypes, shapes=self.ingress_shapes, capacity=capacity_between_gates, shared_name=gate_name, name=gate_name) align_outputs = tuple( generate_local_pipelines(name="align", devices=align_devices, upstream_gate=ingress, pipeline_type=snap_align.CephSnapStage, num_pipelines=num_align)) assert len(align_outputs) == num_align with tf.name_scope("sort_gate"): gate_name = "global_sort_gate" example_output = align_outputs[0] sort_gate = gate.StreamingGate( sample_tensors=example_output[4:6], id_and_count_upstream=example_output[0], join=True, name=gate_name, shared_name=gate_name, **inter_gate_kwargs) # 4 and 5 are key and namespace enqueue_ops = tuple( sort_gate.enqueue(id_and_count=a[0], components=a[4:6]) for a in align_outputs) if args.align_counters: if getattr(args, "summary", False): with tf.name_scope(None): with tf.name_scope(performance_name_scope): enqueue_ops = tuple( make_counter( counter_name="aligned_counter", summary_name="aligned_num_records", deps_and_counters=zip( enqueue_ops, (a[3] for a in align_outputs)))) else: self.log.warning( "Align counters requested, but no summary was requested. Please enable summary for this to work" ) gate.add_gate_runner( gate_runner=gate.GateRunner(gate=sort_gate, enqueue_ops=enqueue_ops, device=sort_gate.device)) if args.credit_link == credit_link_successive: gate.add_credit_supplier_from_gates(upstream_gate=ingress, downstream_gate=sort_gate) sort_outputs = tuple( generate_local_pipelines(name="sort", devices=sort_devices, upstream_gate=sort_gate, pipeline_type=sort_stage.CephSort, num_pipelines=num_sort)) assert len(sort_outputs) == num_sort sort_outputs = tuple(itertools.chain.from_iterable(sort_outputs)) with tf.name_scope("merge_gate"): gate_name = "global_merge_gate" example_output = sort_outputs[0] merge_gate = gate.StreamingGate( sample_tensors=example_output[1:-1], # slice off the id id_and_count_upstream=example_output[0], join=True, name=gate_name, shared_name=gate_name, **inter_gate_kwargs) enqueue_ops = tuple( merge_gate.enqueue(id_and_count=a[0], components=a[1:-1]) for a in sort_outputs) if args.align_counters: if getattr(args, "summary", False): with tf.name_scope(None): with tf.name_scope(performance_name_scope): enqueue_ops = tuple( make_counter(counter_name="sorted_counter", summary_name="sorted_num_records", deps_and_counters=zip( enqueue_ops, (a[3] for a in sort_outputs)))) else: self.log.warning( "Sort counters requested, but no summary was requested. Please enable summary for this to work" ) gate.add_gate_runner( gate_runner=gate.GateRunner(gate=merge_gate, enqueue_ops=enqueue_ops, device=merge_gate.device)) if args.credit_link == credit_link_successive: gate.add_credit_supplier_from_gates(upstream_gate=sort_gate, downstream_gate=merge_gate) merge_outputs = tuple( generate_local_pipelines(name="merge", devices=merge_devices, upstream_gate=merge_gate, pipeline_type=merge_stage.CephMergeStage, num_pipelines=num_merge)) assert len(merge_outputs) == num_merge merge_outputs = tuple(itertools.chain.from_iterable(merge_outputs)) example_output = merge_outputs[0] gate_name = "egress_gate" with tf.name_scope(gate_name): egress = gate.EgressGate( capacity=capacity_between_gates, sample_tensors=example_output.components, id_and_count_upstream=example_output.id_and_count, join=True, name=gate_name, shared_name=gate_name) enqueue_ops = tuple( egress.enqueue(id_and_count=a.id_and_count, components=a.components) for a in merge_outputs) if args.merge_counters: if getattr(args, "summary", False): with tf.name_scope(None): with tf.name_scope(performance_name_scope): enqueue_ops = tuple( make_counter(counter_name="merged_counter", summary_name="merged_num_records", deps_and_counters=zip( enqueue_ops, (a.components[2] for a in merge_outputs)))) else: self.log.warning( "Merge counters requested, but no summary was requested. Please enable summary for this to work" ) gate.add_gate_runner(gate_runner=gate.GateRunner( gate=egress, enqueue_ops=enqueue_ops, device=egress.device)) if args.credit_link == credit_link_end_to_end: gate.add_credit_supplier_from_gates(upstream_gate=ingress, downstream_gate=egress) else: gate.add_credit_supplier_from_gates(upstream_gate=merge_gate, downstream_gate=egress) self.close_op = (ingress.close(), sort_gate.close(), merge_gate.close(), egress.close()) self.close_op = self.close_op[:1] self.close_op = [] with tf.name_scope("client_slots"): unknown_shape = tf.TensorShape([None]) batch_ingress_shapes = tuple( unknown_shape.concatenate(ishape) for ishape in self.ingress_shapes) for idx in range(num_client_slots): ingress_placeholders = tuple( tf.placeholder(dtype=dtype, shape=shape, name="client_slot_{}".format(idx)) for dtype, shape in zip(self.ingress_dtypes, batch_ingress_shapes)) ingress_enqueue = ingress.enqueue_request( components=ingress_placeholders, name="ingress_enqueue_{}".format(idx)) egress_dequeue = egress.dequeue_request( request_id=ingress_enqueue, name="egress_dequeue_{}".format(idx)) yield self.ClientSlot( ingress_placeholders=ingress_placeholders, egress_dequeue=egress_dequeue)