Exemplo n.º 1
0
    def get(self) -> pd.DataFrame:
        ns = self.__namespace or '_'
        ds = self.__dataset
        version = self.__version or 'latest'

        resp = client.get('/datasets/' + ns + '/' + ds + '/versions/' + version + '/data')
        return pandavro.from_avro(BytesIO(resp.content))
Exemplo n.º 2
0
def test_buffer_e2e(dataframe):
    tf = NamedTemporaryFile()
    pdx.to_avro(tf.name, dataframe)
    with open(tf.name, 'rb') as f:
        expect = pdx.from_avro(BytesIO(f.read()))
    assert_frame_equal(expect, dataframe)
    f.close()
Exemplo n.º 3
0
def test_delegation(dataframe):
    tf = NamedTemporaryFile()
    pdx.to_avro(tf.name, dataframe)
    expect = pdx.from_avro(tf.name)
    expect['DateTime64'] = expect['DateTime64'].astype(
        np.dtype('datetime64[ns]'))
    assert_frame_equal(expect, dataframe)
Exemplo n.º 4
0
def test_append(dataframe):
    tf = NamedTemporaryFile()
    pdx.to_avro(tf.name, dataframe[0:int(dataframe.shape[0] / 2)])
    pdx.to_avro(tf.name, dataframe[int(dataframe.shape[0] / 2):], append=True)
    expect = pdx.from_avro(tf.name)
    expect['DateTime64'] = expect['DateTime64'].astype(
        np.dtype('datetime64[ns]'))
    assert_frame_equal(expect, dataframe)
Exemplo n.º 5
0
def _create_tfrecord(file):
    data = pdx.from_avro("TrainTemp/" + file)
    fname = file.replace("avro", "tfrecords")

    with tf.python_io.TFRecordWriter("TrainData/" + fname) as writer:
        for _, row in data.iterrows():
            index = [idx[row[col]] for col, idx in COLUMNS.items()]
            value = np.full(len(index), 1)
            label = row.click

            example = tf.train.Example()
            example.features.feature["index"].int64_list.value.extend(index)
            example.features.feature["value"].int64_list.value.extend(value)
            example.features.feature["label"].int64_list.value.append(label)
            writer.write(example.SerializeToString())
Exemplo n.º 6
0
def test_delegation(dataframe):
    tf = NamedTemporaryFile()
    pdx.to_avro(tf.name, dataframe)
    expect = pdx.from_avro(tf.name)
    assert_frame_equal(expect, dataframe)
Exemplo n.º 7
0
def test_file_path_e2e(dataframe):
    tf = NamedTemporaryFile()
    pdx.to_avro(tf.name, dataframe)
    expect = pdx.from_avro(tf.name)
    assert_frame_equal(expect, dataframe)
Exemplo n.º 8
0
def main():
    weather = pdx.from_avro('weather.avro')

    print(weather)

    pdx.to_avro('weather_out.avro', weather)
Exemplo n.º 9
0
 def _load_avro(dirs):
     gcs = gcsio.GcsIO()
     with gcs.open(dirs, 'rb') as f:
         return pdx.from_avro(f)
Exemplo n.º 10
0
def deserialize_avro_str_to_pandas(avro_str: str, schema: dict = None) -> pd.DataFrame:
    return pandavro.from_avro(io.BytesIO(base64.b64decode(avro_str)), schema)
Exemplo n.º 11
0
    {
        "name": "유재석",
        "birth": "1972-08-14",
        "job": "MC, 개그맨"
    },
    {
        "name": "강호동",
        "birth": "1970-05-11",
        "job": "MC, 개그맨"
    },
    {
        "name": "김구라",
        "birth": "1970-10-03",
        "job": "MC, 개그맨"
    },
]
print(type(member))
print()

# DataFrame
df1 = pd.DataFrame.from_records(member)
print(df1)
print()

# Avro 쓰기
pandavro.to_avro("./data/csv/member.avro", df1)

# Avro 읽기
df2 = pandavro.from_avro("./data/csv/member.avro")
print(df2)