def get_shuffle_source_to_text_sink_message(shuffle_source_spec):
    rsi = dataflow.ReadInstruction()
    rsi.source = dataflow.Source()
    rsi.source.spec = dataflow.Source.SpecValue()
    for k, v in shuffle_source_spec.iteritems():
        rsi.source.spec.additionalProperties.append(
            dataflow.Source.SpecValue.AdditionalProperty(
                key=k, value=to_json_value(v)))
    add_source_codec_spec(rsi)

    wi = dataflow.WriteInstruction()
    wi.input = dataflow.InstructionInput()
    wi.sink = dataflow.Sink()
    wi.sink.spec = dataflow.Sink.SpecValue()
    for k, v in TEXT_SINK_SPEC.iteritems():
        wi.sink.spec.additionalProperties.append(
            dataflow.Sink.SpecValue.AdditionalProperty(key=k,
                                                       value=to_json_value(v)))
    add_sink_codec_spec(wi)

    mt = dataflow.MapTask()
    mt.instructions.append(get_instruction_with_outputs(read=rsi))
    mt.instructions.append(dataflow.ParallelInstruction(write=wi))

    wi = dataflow.WorkItem()
    wi.id = 1234
    wi.projectId = 'project'
    wi.jobId = 'job'
    wi.mapTask = mt

    m = dataflow.LeaseWorkItemResponse()
    m.workItems.append(wi)
    return m
def get_in_memory_source_to_flatten_message():
    rsi = dataflow.ReadInstruction()
    rsi.source = dataflow.Source()
    add_source_codec_spec(rsi)
    rsi.source.spec = dataflow.Source.SpecValue()
    for k, v in IN_MEMORY_SOURCE_SPEC.iteritems():
        rsi.source.spec.additionalProperties.append(
            dataflow.Source.SpecValue.AdditionalProperty(
                key=k, value=to_json_value(v)))
    # Note that the in-memory source spec requires a windowed coder.
    add_source_windowed_codec_spec(rsi)

    fi = dataflow.FlattenInstruction()
    fi.inputs = [dataflow.InstructionInput()]

    mt = dataflow.MapTask()
    mt.instructions.append(get_instruction_with_outputs(read=rsi))
    mt.instructions.append(get_instruction_with_outputs(flatten=fi))

    wi = dataflow.WorkItem()
    wi.id = 1234
    wi.projectId = 'project'
    wi.jobId = 'job'
    wi.mapTask = mt

    m = dataflow.LeaseWorkItemResponse()
    m.workItems.append(wi)
    return m
def get_text_source_to_shuffle_sink_message():
    ri = dataflow.ReadInstruction()
    ri.source = dataflow.Source()
    ri.source.spec = dataflow.Source.SpecValue()
    for k, v in TEXT_SOURCE_SPEC.iteritems():
        ri.source.spec.additionalProperties.append(
            dataflow.Source.SpecValue.AdditionalProperty(
                key=k, value=to_json_value(v)))
    add_source_codec_spec(ri)

    di = dataflow.ParDoInstruction()
    di.input = dataflow.InstructionInput()
    di.input.producerInstructionIndex = 1
    di.multiOutputInfos = [dataflow.MultiOutputInfo(tag='out')]
    di.userFn = dataflow.ParDoInstruction.UserFnValue()
    for k, v in PARDO_DOFN_SPEC.iteritems():
        di.userFn.additionalProperties.append(
            dataflow.ParDoInstruction.UserFnValue.AdditionalProperty(
                key=k, value=to_json_value(v)))

    wsi = dataflow.WriteInstruction()
    wsi.input = dataflow.InstructionInput()
    wsi.input.producerInstructionIndex = 1
    di.input.outputNum = 0
    wsi.sink = dataflow.Sink()
    wsi.sink.spec = dataflow.Sink.SpecValue()
    for k, v in SHUFFLE_SINK_SPEC.iteritems():
        wsi.sink.spec.additionalProperties.append(
            dataflow.Sink.SpecValue.AdditionalProperty(key=k,
                                                       value=to_json_value(v)))
    add_sink_codec_spec(wsi)

    mt = dataflow.MapTask()
    mt.instructions.append(get_instruction_with_outputs(read=ri))
    mt.instructions.append(get_instruction_with_outputs(parDo=di))
    mt.instructions.append(dataflow.ParallelInstruction(write=wsi))

    wi = dataflow.WorkItem()
    wi.id = 1234
    wi.projectId = 'project'
    wi.jobId = 'job'
    wi.mapTask = mt

    m = dataflow.LeaseWorkItemResponse()
    m.workItems.append(wi)
    return m