示例#1
0
    def make_central_pipeline(self, inputs, local_head_gate):
        """
        :param inputs:
        :param local_head_gate:
        :return: (id_and_count, record_id, intermediate_name, superchunk_num_records, superchunk_matrix) + rest_of_input
        """
        inputs = sanitize_generator(inputs)
        queue_name = "sort_ready_to_decomp"
        ready_to_decomp = pipeline.join(upstream_tensors=inputs,
                                        parallel=self.decompress_parallel,
                                        capacity=self.pre_decomp_capacity,
                                        multi=True,
                                        name=queue_name,
                                        shared_name=queue_name)
        with tf.name_scope("decompression_stage"):
            ready_to_sort_items = sanitize_generator(
                self.make_decomp_stage(ready_to_decomp=ready_to_decomp))
        assert len(ready_to_sort_items) > 0

        queue_name = "pre_sort_gate"
        example_item = ready_to_sort_items[0]
        pre_sort_gate = gate.StreamingGate(
            name=queue_name,
            shared_name=queue_name,
            id_and_count_upstream=example_item[0],
            sample_tensors=example_item[1:],
            capacity=self.pre_sort_gate_capacity,
            limit_upstream=True,
            limit_downstream=False)
        gate.add_credit_supplier_from_gates(upstream_gate=local_head_gate,
                                            downstream_gate=pre_sort_gate)

        enqueue_ops = tuple(
            pre_sort_gate.enqueue(id_and_count=a[0], components=a[1:])
            for a in ready_to_sort_items)
        gate.add_gate_runner(gate_runner=gate.GateRunner(
            gate=pre_sort_gate, enqueue_ops=enqueue_ops))

        to_sort_ops = tuple(
            pre_sort_gate.dequeue_many(count=self.sort_batch)
            for _ in range(self.sort_parallel))

        with tf.name_scope("sort_stage"):
            sorted = tuple(self.make_sort_stage(ready_to_sort=to_sort_ops))
        sorted_chunks, control_deps = zip(*sorted)

        queue_name = "sort_ready_to_write"
        ready_to_write = pipeline.join(upstream_tensors=sorted_chunks,
                                       control_dependencies=control_deps,
                                       parallel=self.write_parallel,
                                       multi=True,
                                       capacity=self.pre_write_capacity,
                                       name=queue_name,
                                       shared_name=queue_name)

        return ready_to_write
示例#2
0
    def make_central_pipeline(self, read_columns, head_gate):
        """
        :param read_columns: a generator of (id_and_count, ([ list, of, file, mmap, handles, ... ], {pass around}))
        :return: a generator of (id_and_count, (chunk_matrix, record_id, {pass around})
        """
        read_columns = sanitize_generator(read_columns)

        if self.order_by == location_value:
            read_columns = sanitize_generator(
                self.make_index_building_stage(read_columns=read_columns))

        # a gen of (id_and_count, components)
        # components = ([ handles, columns ])
        queue_name = "pre_merge_barrier_gate"
        example_idc, example_comp = read_columns[0]
        pre_merge_gate = gate.StreamingGate(
            name=queue_name,
            shared_name=queue_name,
            id_and_count_upstream=example_idc,
            sample_tensors=example_comp,
            capacity=self.pre_merge_gate_capacity,
            limit_upstream=True,
            limit_downstream=False)
        gate.add_credit_supplier_from_gates(upstream_gate=head_gate,
                                            downstream_gate=pre_merge_gate)

        enqueue_ops = tuple(
            pre_merge_gate.enqueue(id_and_count=idc, components=comp)
            for idc, comp in read_columns)
        gate.add_gate_runner(gate_runner=gate.GateRunner(
            gate=pre_merge_gate, enqueue_ops=enqueue_ops))

        to_merge = (pre_merge_gate.dequeue_whole_dataset()
                    for _ in range(self.merge_parallel))

        with tf.name_scope("merge_merge_stage"):
            to_compress = tuple(self.make_merge_stage(merge_batches=to_merge))

        with tf.name_scope("merge_compress_stage"):
            to_write_items = tuple(
                self.make_compress_stage(
                    to_compress=to_compress))  # returns a generator

        control_deps = tuple(a[1] for a in to_write_items)
        to_write_items = tuple(a[0] for a in to_write_items)

        queue_name = "merge_pre_write_queue"
        to_write = pipeline.join(upstream_tensors=to_write_items,
                                 control_dependencies=control_deps,
                                 parallel=self.write_parallel,
                                 capacity=self.pre_write_capacity,
                                 multi=True,
                                 name=queue_name,
                                 shared_name=queue_name)
        return to_write
示例#3
0
 def make_head_gate(self, upstream_gate):
     id_and_count, components = upstream_gate.dequeue_partition(count=self.global_batch)
     gate_name = "_".join((self.local_dest, "head_gate"))
     head_gate = gate.StreamingGate(limit_upstream=False, limit_downstream=False, # turning both off because there is only one needed, and no credit control is necessary
                                    id_and_count_upstream=id_and_count, sample_tensors=components,
                                    sample_tensors_are_batch=True,
                                    capacity=2,
                                    name=gate_name, shared_name=gate_name)
     enq_ops = (head_gate.enqueue_many(id_and_count=id_and_count, components=components),)
     gate.add_gate_runner(gate_runner=gate.GateRunner(gate=head_gate, enqueue_ops=enq_ops))
     return head_gate
示例#4
0
 def make_local_gate(upstream_gate):
     idc, comp = upstream_gate.dequeue_partition(count=1)
     head_gate = gate.StreamingGate(limit_upstream=False,
                                    limit_downstream=False,
                                    id_and_count_upstream=idc,
                                    sample_tensors=comp,
                                    sample_tensors_are_batch=True)
     enq_ops = (head_gate.enqueue_many(id_and_count=idc, components=comp), )
     gate.add_gate_runner(
         gate_runner=gate.GateRunner(gate=head_gate, enqueue_ops=enq_ops))
     return head_gate
示例#5
0
 def make_head_gate(self, upstream_gate):
     id_and_count, components = upstream_gate.dequeue_whole_partition()
     gate_name = "merge_head_gate"
     head_gate = gate.StreamingGate(
         limit_upstream=
         False,  # we don't limit upstream, as that is from the central queue
         limit_downstream=
         True,  # we do limit the downstream to the batching join gate so it doesn't suck up all resources
         id_and_count_upstream=id_and_count,
         sample_tensors=components,
         name=gate_name,
         shared_name=gate_name,
         capacity=self.head_gate_capacity,
         sample_tensors_are_batch=True)
     enq_ops = (head_gate.enqueue_many(id_and_count=id_and_count,
                                       components=components), )
     gate.add_gate_runner(
         gate_runner=gate.GateRunner(gate=head_gate, enqueue_ops=enq_ops))
     return head_gate
示例#6
0
    def _construct_graph(self, args, device_map, num_client_slots):
        gate_name = "ingress_gate"

        num_merge = args.merge_stages
        num_combo = args.combo_stages
        num_align = args.align_stages
        if (num_merge + num_combo) < 1:
            raise Exception("Need >0 merge stages. Got {m} pure merge and {c} combo".format(m=num_merge, c=num_combo))
        if (num_align + num_combo) < 1:
            raise Exception("Need >0 align stages. Got {a} pure align stages and {c} combo".format(a=num_align, c=num_combo))

        if args.parallel_open_requests is not None:
            capacity_between_gates = args.parallel_open_requests
        else:
            capacity_between_gates = int(num_client_slots * args.parallel_open_request_expansion_factor)
        if capacity_between_gates < 1:
            raise Exception("Capacity between gates is <1 ({c})".format(c=capacity_between_gates))
        args.parallel_open_requests = capacity_between_gates
        self.log.info("Capacity between gates: {}".format(capacity_between_gates))

        with tf.name_scope(gate_name):
            ingress = gate.IngressGate(dtypes=self.ingress_dtypes, shapes=self.ingress_shapes, capacity=capacity_between_gates,
                                       shared_name=gate_name, name=gate_name)

        with tf.name_scope("align_sort_stage"):
            align_stages = tuple(fused_align_sort.LocalFusedStage(args=args) for _ in range(num_align))
            small_align_stages = tuple(fused_align_sort.SmallLocalFusedStage(args=args) for _ in range(num_combo))
            def make_align_stages(stages, align_devices):
                for stage, device in zip(align_stages, align_devices):
                    with device():
                        device_graph = stage.make_graph(upstream_gate=ingress)
                        try: # convert to a tuple if it returns a generator
                            device_graph[0]
                        except TypeError:
                            device_graph = tuple(device_graph)
                        assert len(stage.run_first) > 0
                        for item in stage.run_first:
                            self._add_run_first(tensor=item)
                        yield device_graph
            outputs = tuple(itertools.chain.from_iterable(
                make_align_stages(stages=s, align_devices=devices) for s, devices in (
                    (align_stages, device_map.get(align_sort_key, None)),
                    (small_align_stages, device_map.get(combo_key, None))
                ) if devices is not None
            ))
        assert len(outputs) == num_align + num_combo, "Expected {e} align stage ({a} pure align and {c} combo) but only got {actual}".format(
            e=num_align+num_combo, a=num_align, c=num_combo, actual=len(outputs))

        outputs = tuple(itertools.chain.from_iterable(outputs)) # flattens it
        example_output = outputs[0]
        if args.credit_link == credit_link_end_to_end:
            merge_gate_kwargs = {
                "limit_upstream": False,
                "limit_downstream": False
            }
        else:
            merge_gate_kwargs = {
                "capacity": capacity_between_gates
            }
        with tf.name_scope("inter_stage_gate"):
            gate_name = "ready_to_merge"
            merge_gate = gate.StreamingGate(
                sample_tensors=example_output[1:-1],
                id_and_count_upstream=example_output[0], join=True,
                name=gate_name, shared_name=gate_name,
                **merge_gate_kwargs
            )
            enqueue_ops = tuple(merge_gate.enqueue(id_and_count=a[0], components=a[1:-1]) for a in outputs)
            if args.align_counters:
                if getattr(args, "summary", False):
                    with tf.name_scope(None): # clears this out of the inter_stage_gate scope
                        with tf.name_scope(performance_name_scope):
                            enqueue_ops = tuple(make_counter(counter_name="sorted_counter",
                                                             summary_name="sorted_num_records",
                                                             deps_and_counters=zip(
                                                                 enqueue_ops,
                                                                 (a[-1] for a in outputs)
                                                             )))
                else:
                    self.log.warning("Align counters requested, but no summary was requested. Please enable summary for this to work.")

            gate.add_gate_runner(gate_runner=gate.GateRunner(gate=merge_gate, enqueue_ops=enqueue_ops, device=merge_gate.device))
            if args.credit_link == credit_link_successive:
                gate.add_credit_supplier_from_gates(upstream_gate=ingress, downstream_gate=merge_gate)

        with tf.name_scope("merge_stage"):
            merge_stages = tuple(merge_stage.LocalMergeStage(args=args) for _ in range(num_merge))
            small_merge_stages = tuple(merge_stage.SmallLocalMergeStage(args=args) for _ in range(num_combo))

            def make_merge_stages(stages, merge_devices):
                for stage, device in zip(merge_stages, merge_devices):
                    with device():
                        device_graph = stage.make_graph(upstream_gate=merge_gate)
                        try:
                            device_graph[0]
                        except TypeError:
                            device_graph = tuple(device_graph)
                        yield device_graph

            merge_stage_outputs = tuple(itertools.chain.from_iterable(
                make_merge_stages(stages=s, merge_devices=devices) for s, devices in (
                    (merge_stages, device_map.get(merge_key, None)),
                    (small_merge_stages, device_map.get(combo_key, None))
                ) if devices is not None
            ))
        assert len(merge_stage_outputs) == num_merge + num_combo, "Expected {e} merge devices ({p} pure merge and {c} combo}, but only got {actual}".format(
            p=num_merge, c=num_combo, e=num_merge+num_combo, actual=len(merge_stage_outputs)
        )

        merge_stage_outputs = tuple(itertools.chain.from_iterable(merge_stage_outputs)) # flattens it
        example_output = merge_stage_outputs[0]
        gate_name = "egress_gate"
        with tf.name_scope(gate_name):
            egress = gate.EgressGate(capacity=capacity_between_gates, sample_tensors=example_output[1:],
                                     id_and_count_upstream=example_output[0], join=True,
                                     name=gate_name, shared_name=gate_name)
            enqueue_ops = tuple(egress.enqueue(id_and_count=a[0], components=a[1:]) for a in merge_stage_outputs)
            if args.merge_counters:
                if getattr(args, "summary", False):
                    with tf.name_scope(None):
                        with tf.name_scope(performance_name_scope):
                            enqueue_ops = tuple(make_counter(counter_name="merged_counter",
                                                             summary_name="merged_num_records",
                                                             deps_and_counters=zip(
                                                                 enqueue_ops,
                                                                 (a[3] for a in merge_stage_outputs)
                                                             )))
                else:
                    self.log.warning("Merge counters requested, but no summary was requested. Please enable summary for this to work")

            gate.add_gate_runner(gate_runner=gate.GateRunner(gate=egress, enqueue_ops=enqueue_ops, device=egress.device))
            if args.credit_link == credit_link_end_to_end:
                gate.add_credit_supplier_from_gates(upstream_gate=ingress, downstream_gate=egress)
            else:
                gate.add_credit_supplier_from_gates(upstream_gate=merge_gate, downstream_gate=egress)

        self.close_op = (ingress.close(), egress.close())

        with tf.name_scope("client_slots"):
            unknown_shape = tf.TensorShape([None])
            batch_ingress_shapes = tuple(unknown_shape.concatenate(ishape) for ishape in self.ingress_shapes)
            for idx in range(num_client_slots):
                ingress_placeholders = tuple(tf.placeholder(dtype=dtype, shape=shape, name="client_slot_{}".format(idx)) for dtype, shape in zip(self.ingress_dtypes, batch_ingress_shapes))
                ingress_enqueue = ingress.enqueue_request(components=ingress_placeholders, name="ingress_enqueue_{}".format(idx))
                egress_dequeue = egress.dequeue_request(request_id=ingress_enqueue, name="egress_dequeue_{}".format(idx))
                yield self.ClientSlot(ingress_placeholders=ingress_placeholders, egress_dequeue=egress_dequeue)
示例#7
0
    def _construct_graph(self, args, device_map, num_client_slots):
        gate_name = "ingress_gate"

        num_align = args.align_stages
        num_sort = args.sort_stages
        num_merge = args.merge_stages
        align_devices = device_map[align_key]
        sort_devices = device_map[sort_key]
        merge_devices = device_map[merge_key]

        if args.parallel_open_requests is not None:
            capacity_between_gates = args.parallel_open_requests
        else:
            capacity_between_gates = int(
                num_client_slots * args.parallel_open_request_expansion_factor)
        if capacity_between_gates < 1:
            raise Exception("Capacity between gates is <1 ({c})".format(
                c=capacity_between_gates))
        args.parallel_open_requests = capacity_between_gates
        self.log.info(
            "Capacity between gates: {}".format(capacity_between_gates))
        if args.credit_link == credit_link_end_to_end:
            inter_gate_kwargs = {
                "limit_upstream": False,
                "limit_downstream": False
            }
        else:
            inter_gate_kwargs = {"capacity": capacity_between_gates}

        def generate_local_pipelines(name, num_pipelines, devices,
                                     upstream_gate, pipeline_type):
            with tf.name_scope("{}_pipeline".format(name)):
                pipelines = tuple(
                    pipeline_type(args=args) for _ in range(num_pipelines))
                assert len(pipelines) == len(devices)
                for pipeline, device in zip(pipelines, devices):
                    with device():
                        device_graph = pipeline.make_graph(
                            upstream_gate=upstream_gate)
                    try:  # convert to a tuple if it returns a generator
                        device_graph[0]
                    except TypeError:
                        device_graph = tuple(device_graph)
                    run_first = pipeline.run_first
                    for item in run_first:
                        self._add_run_first(item)
                    yield device_graph

        with tf.name_scope(gate_name):
            ingress = gate.IngressGate(dtypes=self.ingress_dtypes,
                                       shapes=self.ingress_shapes,
                                       capacity=capacity_between_gates,
                                       shared_name=gate_name,
                                       name=gate_name)

        align_outputs = tuple(
            generate_local_pipelines(name="align",
                                     devices=align_devices,
                                     upstream_gate=ingress,
                                     pipeline_type=snap_align.CephSnapStage,
                                     num_pipelines=num_align))
        assert len(align_outputs) == num_align

        with tf.name_scope("sort_gate"):
            gate_name = "global_sort_gate"
            example_output = align_outputs[0]
            sort_gate = gate.StreamingGate(
                sample_tensors=example_output[4:6],
                id_and_count_upstream=example_output[0],
                join=True,
                name=gate_name,
                shared_name=gate_name,
                **inter_gate_kwargs)
            # 4 and 5 are key and namespace
            enqueue_ops = tuple(
                sort_gate.enqueue(id_and_count=a[0], components=a[4:6])
                for a in align_outputs)

            if args.align_counters:
                if getattr(args, "summary", False):
                    with tf.name_scope(None):
                        with tf.name_scope(performance_name_scope):
                            enqueue_ops = tuple(
                                make_counter(
                                    counter_name="aligned_counter",
                                    summary_name="aligned_num_records",
                                    deps_and_counters=zip(
                                        enqueue_ops,
                                        (a[3] for a in align_outputs))))
                else:
                    self.log.warning(
                        "Align counters requested, but no summary was requested. Please enable summary for this to work"
                    )
            gate.add_gate_runner(
                gate_runner=gate.GateRunner(gate=sort_gate,
                                            enqueue_ops=enqueue_ops,
                                            device=sort_gate.device))
            if args.credit_link == credit_link_successive:
                gate.add_credit_supplier_from_gates(upstream_gate=ingress,
                                                    downstream_gate=sort_gate)

        sort_outputs = tuple(
            generate_local_pipelines(name="sort",
                                     devices=sort_devices,
                                     upstream_gate=sort_gate,
                                     pipeline_type=sort_stage.CephSort,
                                     num_pipelines=num_sort))
        assert len(sort_outputs) == num_sort
        sort_outputs = tuple(itertools.chain.from_iterable(sort_outputs))

        with tf.name_scope("merge_gate"):
            gate_name = "global_merge_gate"
            example_output = sort_outputs[0]
            merge_gate = gate.StreamingGate(
                sample_tensors=example_output[1:-1],  # slice off the id
                id_and_count_upstream=example_output[0],
                join=True,
                name=gate_name,
                shared_name=gate_name,
                **inter_gate_kwargs)
            enqueue_ops = tuple(
                merge_gate.enqueue(id_and_count=a[0], components=a[1:-1])
                for a in sort_outputs)

            if args.align_counters:
                if getattr(args, "summary", False):
                    with tf.name_scope(None):
                        with tf.name_scope(performance_name_scope):
                            enqueue_ops = tuple(
                                make_counter(counter_name="sorted_counter",
                                             summary_name="sorted_num_records",
                                             deps_and_counters=zip(
                                                 enqueue_ops,
                                                 (a[3]
                                                  for a in sort_outputs))))
                else:
                    self.log.warning(
                        "Sort counters requested, but no summary was requested. Please enable summary for this to work"
                    )
            gate.add_gate_runner(
                gate_runner=gate.GateRunner(gate=merge_gate,
                                            enqueue_ops=enqueue_ops,
                                            device=merge_gate.device))
            if args.credit_link == credit_link_successive:
                gate.add_credit_supplier_from_gates(upstream_gate=sort_gate,
                                                    downstream_gate=merge_gate)

        merge_outputs = tuple(
            generate_local_pipelines(name="merge",
                                     devices=merge_devices,
                                     upstream_gate=merge_gate,
                                     pipeline_type=merge_stage.CephMergeStage,
                                     num_pipelines=num_merge))
        assert len(merge_outputs) == num_merge
        merge_outputs = tuple(itertools.chain.from_iterable(merge_outputs))

        example_output = merge_outputs[0]
        gate_name = "egress_gate"
        with tf.name_scope(gate_name):
            egress = gate.EgressGate(
                capacity=capacity_between_gates,
                sample_tensors=example_output.components,
                id_and_count_upstream=example_output.id_and_count,
                join=True,
                name=gate_name,
                shared_name=gate_name)
            enqueue_ops = tuple(
                egress.enqueue(id_and_count=a.id_and_count,
                               components=a.components) for a in merge_outputs)
            if args.merge_counters:
                if getattr(args, "summary", False):
                    with tf.name_scope(None):
                        with tf.name_scope(performance_name_scope):
                            enqueue_ops = tuple(
                                make_counter(counter_name="merged_counter",
                                             summary_name="merged_num_records",
                                             deps_and_counters=zip(
                                                 enqueue_ops,
                                                 (a.components[2]
                                                  for a in merge_outputs))))
                else:
                    self.log.warning(
                        "Merge counters requested, but no summary was requested. Please enable summary for this to work"
                    )

            gate.add_gate_runner(gate_runner=gate.GateRunner(
                gate=egress, enqueue_ops=enqueue_ops, device=egress.device))
            if args.credit_link == credit_link_end_to_end:
                gate.add_credit_supplier_from_gates(upstream_gate=ingress,
                                                    downstream_gate=egress)
            else:
                gate.add_credit_supplier_from_gates(upstream_gate=merge_gate,
                                                    downstream_gate=egress)

        self.close_op = (ingress.close(), sort_gate.close(),
                         merge_gate.close(), egress.close())
        self.close_op = self.close_op[:1]
        self.close_op = []

        with tf.name_scope("client_slots"):
            unknown_shape = tf.TensorShape([None])
            batch_ingress_shapes = tuple(
                unknown_shape.concatenate(ishape)
                for ishape in self.ingress_shapes)
            for idx in range(num_client_slots):
                ingress_placeholders = tuple(
                    tf.placeholder(dtype=dtype,
                                   shape=shape,
                                   name="client_slot_{}".format(idx))
                    for dtype, shape in zip(self.ingress_dtypes,
                                            batch_ingress_shapes))
                ingress_enqueue = ingress.enqueue_request(
                    components=ingress_placeholders,
                    name="ingress_enqueue_{}".format(idx))
                egress_dequeue = egress.dequeue_request(
                    request_id=ingress_enqueue,
                    name="egress_dequeue_{}".format(idx))
                yield self.ClientSlot(
                    ingress_placeholders=ingress_placeholders,
                    egress_dequeue=egress_dequeue)