示例#1
0
def test_write(N, fname):
    with open(fname, 'w') as f:
        writer = BinaryWriter(f)

        def foo():
            writer_send = writer.send
            while True:
                key, val = yield True
                writer_send("mapItem", key, val)
        foo_fg = foo()
        foo_fg.next()
        for i in range(N):
            foo_fg.send(("key", "val"))
示例#2
0
def stream_writer(fname, data):
    with open(fname, 'w') as f:
        bw = BinaryWriter(f)
        for vals in data:
            cmd, args = vals[0], vals[1:]
            bw.send(cmd, *args)
示例#3
0
def write_data(N, fname):
    with open(fname, 'w') as f:
        writer = BinaryWriter(f)
        for i in range(N):
            writer.send('mapItem', "key", "val")
        writer.send('close')
示例#4
0
 def __write_cmd_file(self, mode):
     if mode != 'K' and mode != 'V':
         # FIXME: add support for 'KV'
         raise RuntimeError("Mode %r not supported" % (mode, ))
     schema_prop = (AVRO_KEY_INPUT_SCHEMA
                    if mode == 'K' else AVRO_VALUE_INPUT_SCHEMA)
     cmd_fn = self._mkfn('map_in')
     serializer = AvroSerializer(self.schema)
     with open(cmd_fn, 'wb') as f:
         bw = BinaryWriter(f)
         bw.send(bw.START_MESSAGE, 0)
         bw.send(bw.SET_JOB_CONF, AVRO_INPUT, mode, schema_prop,
                 str(self.schema), 'mapreduce.pipes.isjavarecordreader',
                 'true', 'mapreduce.pipes.isjavarecordwriter', 'true')
         bw.send(bw.RUN_MAP, 'input_split', 0, True)
         bw.send(bw.SET_INPUT_TYPES, 'key_type', 'value_type')
         for r in self.records:
             if mode == 'K':
                 bw.send(bw.MAP_ITEM, serializer.serialize(r), 'v')
             else:
                 bw.send(bw.MAP_ITEM, 'k', serializer.serialize(r))
         bw.send(bw.CLOSE)
         bw.close()
     return cmd_fn
示例#5
0
 def __write_cmd_file(self, mode):
     if mode != 'K' and mode != 'V':
         # FIXME: add support for 'KV'
         raise RuntimeError("Mode %r not supported" % (mode,))
     schema_prop = pydoop.PROPERTIES[
         'AVRO_%s_INPUT_SCHEMA' % ('KEY' if mode == 'K' else 'VALUE')
     ]
     cmd_fn = self._mkfn('map_in')
     serializer = AvroSerializer(self.schema)
     with open(cmd_fn, 'w') as f:
         bwriter = BinaryWriter(f)
         bwriter.send('start', 0)
         bwriter.send('setJobConf', (
             pydoop.PROPERTIES['AVRO_INPUT'], mode,
             schema_prop, str(self.schema)
         )),
         bwriter.send('setInputTypes', 'key_type', 'value_type')
         bwriter.send('runMap', 'input_split', 0, False)
         for r in self.records:
             if mode == 'K':
                 bwriter.send('mapItem', serializer.serialize(r), 'v')
             else:
                 bwriter.send('mapItem', 'k', serializer.serialize(r))
         bwriter.send('close')
     return cmd_fn