Python BytesWritable примеры использования

Язык программирования: Python

Пространство имен/Пакет: hadoop.io

Класс/Тип: BytesWritable

Примеров на hotexamples.com: 10

Python BytesWritable - 10 примеров найдено. Это лучшие примеры Python кода для hadoop.io.BytesWritable, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

BytesWritable(6)

set(5)

toString(1)

Пример #1

Показать файл

def writeData(writer, filename, data):
    key = Text()
    value = BytesWritable()

    key.set(filename)
    value.set(data)
    writer.append(key, value)

Пример #2

Показать файл

Файл: core.py Проект: mc2-project/mc2

def convert_to_sequencefiles(cpp_encrypted_data):
    # Get all data files outputted by C++
    partition_pattern = os.path.join(cpp_encrypted_data, "data/cpp-part*")
    partition_files = glob.glob(partition_pattern)

    # Convert each partition to SequenceFile format
    for partition_file in partition_files:
        # FIXME: should we stream this so we dont load entire 1 GB into memory?
        with open(partition_file, "rb") as partition:
            partition_data = partition.read()

        # FIXME: better way of generating new file name
        # This way has the limitation of original path cannot contain `cpp-`
        output_partition_file = partition_file.replace("cpp-", "")
        sequence_file_writer = SequenceFile.createWriter(
            output_partition_file, IntWritable, BytesWritable)

        key = IntWritable()
        value = BytesWritable()

        key.set(0)
        value.set(partition_data)

        sequence_file_writer.append(key, value)
        sequence_file_writer.close()

        # Remove temporary file generated by C++
        os.remove(partition_file)

Пример #3

Показать файл

def sequence(file_out, s3_files_in, make_key, tempvaluefile="/tmp/temp.nc"):
    """
    String file path to write to
    A list of string file paths to read from. Each file in is encoded to a
    different k, v pair, with the key equal to the cube's metadata
    make_key is a function with takes a cube and returns a uid string
    
    """
    keys_done = []

    writer = SequenceFile.createWriter(file_out, Text, BytesWritable)
    for s3_file_in in s3_files_in:
        f = get_s3_file(s3_file_in, tempvaluefile)
        c = iris.load_cube(f)
        key_writer = Text()

        if (str(c.metadata) in keys_done):
            warnings.warn("Key for file " + f +
                          " already present - overwriting")
        key_writer.set(make_key(c))
        keys_done.append(str(c.metadata))

        value_writer = BytesWritable()
        with open(tempvaluefile, "rb") as f:
            print s3_file_in
            value_writer.set(f.read())
            writer.append(key_writer, value_writer)
    writer.close()

Пример #4

Показать файл

Файл: seqdeseq.py Проект: met-office-lab/mr-pynetcdf

def sequence(file_out, s3_files_in, make_key, tempvaluefile="/tmp/temp.nc"):
    """
    String file path to write to
    A list of string file paths to read from. Each file in is encoded to a
    different k, v pair, with the key equal to the cube's metadata
    make_key is a function with takes a cube and returns a uid string
    
    """
    keys_done = []
    
    writer = SequenceFile.createWriter(file_out, Text, BytesWritable)
    for s3_file_in in s3_files_in:
        f = get_s3_file(s3_file_in, tempvaluefile)
        c = iris.load_cube(f)
        key_writer = Text()
        
        if (str(c.metadata) in keys_done):
            warnings.warn("Key for file "+f+" already present - overwriting")
        key_writer.set(make_key(c))
        keys_done.append(str(c.metadata))
        
        value_writer = BytesWritable()
        with open(tempvaluefile, "rb") as f:
            print s3_file_in
            value_writer.set(f.read())
            writer.append(key_writer, value_writer)
    writer.close()

Пример #5

Показать файл

Файл: SequenceFileWriterDemoBytes.py Проект: weason-will/python-hadoop

def writeData(writer):
    key = BytesWritable()
    value = BytesWritable()

    # for i in xrange(1000):
    key.set("A")
    value.set("B")
    print '[%d] %s %s' % (writer.getLength(), key.toString(), value.toString())
    writer.append(key, value)

Пример #6

Показать файл

Файл: hdutils.py Проект: xunyuw/iFlyQA

 def writeData(self, key, value):
     datetime_now = time.localtime(time.time())
     if datetime_now.tm_mday != self.file_time.tm_mday or datetime_now.tm_hour != self.file_time.tm_hour:
         self.writer = self.create_writer(self.writer)
     writer = self.writer
     writer_key = Text()
     writer_value = BytesWritable()
     writer_key.set(key)
     writer_value.set(value)
     #print '[%d] %s %s' % (writer.getLength(), writer_key.toString(), writer_value.toString())
     writer.append(writer_key, writer_value)

Пример #7

Показать файл

Файл: sequence_file_writer.py Проект: niallrobinson/netcdf-ingester

def write_seq_file(file_name, data_dict):
    writer = SequenceFile.createWriter(file_name, Text, BytesWritable)
    for key, value in data_dict.iteritems():
        print key, ", " ,
        key_writer = Text()
        key_writer.set(key)
        
        value_writer = BytesWritable()
        iris.save(value, "temp.nc")
        with open("temp.nc", "rb") as f:
            value_writer.set(f.read())
        writer.append(key_writer, value_writer)
    writer.close()

Пример #8

Показать файл

Файл: sequence_file_writer.py Проект: niallrobinson/netcdf-ingester

def write_seq_file(file_name, data_dict):
    writer = SequenceFile.createWriter(file_name, Text, BytesWritable)
    for key, value in data_dict.iteritems():
        print key, ", ",
        key_writer = Text()
        key_writer.set(key)

        value_writer = BytesWritable()
        iris.save(value, "temp.nc")
        with open("temp.nc", "rb") as f:
            value_writer.set(f.read())
        writer.append(key_writer, value_writer)
    writer.close()

Пример #9

Показать файл

Файл: SequenceFileWriterDemoBytes.py Проект: bityon/python-hadoop

def writeData(writer):
    key = BytesWritable()
    value = BytesWritable()

    # for i in xrange(1000):
    key.set("A")
    value.set("B")
    print '[%d] %s %s' % (writer.getLength(), key.toString(), value.toString())
    writer.append(key, value)

Пример #10

Показать файл

Файл: my_import_export.py Проект: keshava/kampa

def importSGY(sgyFilename, rddFilename):

    # os.remove(rddFilename)
    fp = open(sgyFilename, 'rb')
    writer = SequenceFile.createWriter(rddFilename, IntWritable, BytesWritable)

    SH = segypy.getSegyHeader(sgyFilename, 3600, segypy.endian)
    bps = segypy.getBytePerSample(SH)

    filesize = os.path.getsize(sgyFilename)
    samp_count = SH['ns']
    data_len = samp_count * bps
    trace_size = data_len + 240
    ntraces = (filesize - 3600) / trace_size

    data = fp.read(3600)
    for trace_num in range(ntraces):
        SegyTraceHeader = fp.read(240)
        SegyTraceData = fp.read(data_len)
		error - segypy.getValue is not correct
        SegyTraceData = segypy.getValue(
            SegyTraceData, 0, 'float', segypy.endian, samp_count)
        writer.append(IntWritable(trace_num), BytesWritable(
            str(SegyTraceHeader) + str(SegyTraceData)))