示例#1
0
def test_run_write_carbon_binary():
    jsonSchema = "[{stringField:string},{shortField:short},{intField:int},{binaryField:binary}]"
    path = "/tmp/data/writeCarbon" + str(time.time())

    if os.path.exists(path):
        shutil.rmtree(path)

    jpg_path = IMAGE_DATA_PATH + "/carbondatalogo.jpg"

    writer = CarbonWriter() \
      .builder() \
      .outputPath(path) \
      .withCsvInput(jsonSchema) \
      .writtenBy("pycarbon") \
      .build()

    with open(jpg_path, mode='rb+') as file_object:
        content = file_object.read()

    for i in range(0, 10):
        from jnius import autoclass

        arrayListClass = autoclass("java.util.ArrayList")
        data_list = arrayListClass()
        data_list.add("pycarbon")
        data_list.add(str(i))
        data_list.add(str(i * 10))
        data_list.add(content)
        writer.write(data_list.toArray())

    writer.close()

    reader = CarbonReader() \
      .builder() \
      .withFolder(path) \
      .withBatch(1000) \
      .build()

    i = 0
    while reader.hasNext():
        rows = reader.readNextBatchRow()
        for row in rows:
            i += 1
            for column in row:
                from jnius.jnius import ByteArray
                if 1 == i and isinstance(column,
                                         ByteArray) and len(column) > 1000:
                    with open(path + "/image.jpg", 'wb+') as file_object:
                        file_object.write(column.tostring())

    assert 10 == i
    reader.close()

    shutil.rmtree(path)
示例#2
0
def test_run_write_carbon_binary_base64_encode_decodeInJava_many_files():
    jsonSchema = "[{stringField:string},{shortField:short},{intField:int},{binaryField:binary},{txtField:string}]"
    path = "/tmp/data/writeCarbon" + str(time.time())

    if os.path.exists(path):
        shutil.rmtree(path)

    jpg_path = IMAGE_DATA_PATH + "/flowers"

    from jnius import autoclass

    sdkUtilClass = autoclass("org.apache.carbondata.sdk.file.utils.SDKUtil")
    jpg_files = sdkUtilClass.listFiles(jpg_path, '.jpg')

    writer = CarbonWriter() \
      .builder() \
      .outputPath(path) \
      .withCsvInput(jsonSchema) \
      .writtenBy("pycarbon") \
      .withLoadOption("binary_decoder", "base64") \
      .withPageSizeInMb(1) \
      .build()

    for i in range(0, jpg_files.size()):
        jpg_path = jpg_files.get(i)
        with open(jpg_path, mode='rb+') as file_object:
            content = file_object.read()

        with open(str(jpg_path).replace('.jpg', '.txt'),
                  mode='r+') as file_object:
            txt = file_object.read()

        arrayListClass = autoclass("java.util.ArrayList")
        data_list = arrayListClass()
        data_list.add("pycarbon")
        data_list.add(str(i))
        data_list.add(str(i * 10))
        data_list.add(base64.b64encode(content))
        data_list.add(txt)
        writer.write(data_list.toArray())

    writer.close()

    reader = CarbonReader() \
      .builder() \
      .withFolder(path) \
      .withBatch(1000) \
      .build()

    i = 0
    while reader.hasNext():
        rows = reader.readNextBatchRow()
        for row in rows:
            i += 1
            for column in row:
                from jnius.jnius import ByteArray
                if isinstance(column,
                              ByteArray) and len(column) > 1000 and i < 20:
                    with open(path + "/image" + str(i) + ".jpg",
                              'wb+') as file_object:
                        file_object.write((column.tostring()))

    assert 3 == i
    reader.close()

    shutil.rmtree(path)
示例#3
0
def test_run_write_carbon():
    jsonSchema = "[{stringField:string},{shortField:short},{intField:int}]"
    path = "/tmp/data/writeCarbon" + str(time.time())

    if os.path.exists(path):
        shutil.rmtree(path)

    writer = CarbonWriter() \
      .builder() \
      .outputPath(path) \
      .withCsvInput(jsonSchema) \
      .writtenBy("pycarbon") \
      .build()

    for i in range(0, 10):
        from jnius import autoclass
        arrayListClass = autoclass("java.util.ArrayList")
        data_list = arrayListClass()
        data_list.add("pycarbon")
        data_list.add(str(i))
        data_list.add(str(i * 10))
        writer.write(data_list.toArray())

    writer.close()

    reader = CarbonReader() \
      .builder() \
      .withFolder(path) \
      .withBatch(1000) \
      .build()

    i = 0
    while reader.hasNext():
        rows = reader.readNextBatchRow()
        i += len(rows)

    assert 10 == i
    reader.close()

    carbonSchemaReader = CarbonSchemaReader()
    schema = carbonSchemaReader.readSchema(path)

    assert 3 == schema.getFieldsLength()


    writer = CarbonWriter() \
      .builder() \
      .outputPath(path) \
      .withCsvInput(jsonSchema) \
      .writtenBy("pycarbon") \
      .build()

    for i in range(0, 10):
        from jnius import autoclass
        arrayListClass = autoclass("java.util.ArrayList")
        data_list = arrayListClass()
        data_list.add("pycarbon")
        data_list.add(str(i))
        data_list.add(str(i * 10))
        writer.write(data_list.toArray())

    writer.close()

    carbonSchemaReader = CarbonSchemaReader()
    schema = carbonSchemaReader.readSchema(getAsBuffer=False,
                                           path=path,
                                           validateSchema=True)

    assert 3 == schema.getFieldsLength()

    shutil.rmtree(path)