def test_get_df_success_with_pickle_type(self): o = pandas.DataFrame.from_dict(self.data) self.client.put_object(Body=pickle.dumps(o), Bucket=MY_BUCKET, Key=MY_PREFIX + '/key1.pickle') s3_o = get_df(self.client, MY_BUCKET, MY_PREFIX + '/key1.pickle', format='pickle') self.assertSequenceEqual(list(o.columns), list(s3_o.columns)) self.assertSequenceEqual(o.iloc[0].tolist(), s3_o.iloc[0].tolist())
def test_get_df_success_with_parquet_type(self): buffer = io.BytesIO() df = pandas.DataFrame.from_dict(self.data) df.to_parquet(buffer, engine='pyarrow') self.client.put_object(Bucket=MY_BUCKET, Key=MY_PREFIX + '/key1.parquet', Body=buffer.getvalue()) o = get_df(self.client, MY_BUCKET, MY_PREFIX + '/key1.parquet', format='parquet') self.assertSequenceEqual(list(o.columns), list(df.columns)) self.assertSequenceEqual(o.iloc[0].tolist(), df.iloc[0].tolist())
def test_get_df_success_with_csv_type(self): buffer = io.StringIO() df = pandas.DataFrame.from_dict(self.data) df.to_csv(buffer, index_label=False) self.client.put_object(Bucket=MY_BUCKET, Key=MY_PREFIX + '/key1.csv', Body=buffer.getvalue()) o = get_df(self.client, MY_BUCKET, MY_PREFIX + '/key1.csv', format='csv') self.assertSequenceEqual(list(o.columns), list(df.columns)) self.assertSequenceEqual(o.iloc[0].tolist(), df.iloc[0].tolist())
def test_get_df_success_with_xlsx_type(self): buffer = io.BytesIO() df = pandas.DataFrame.from_dict(self.data) writer = pandas.ExcelWriter(buffer, engine='xlsxwriter') df.to_excel(writer, sheet_name='Sheet1', index=False) writer.save() self.client.put_object(Bucket=MY_BUCKET, Key=MY_PREFIX + '/key1.xlsx', Body=buffer.getvalue()) o = get_df(self.client, MY_BUCKET, MY_PREFIX + '/key1.xlsx', format='xlsx') self.assertSequenceEqual(list(o.columns), list(df.columns)) self.assertSequenceEqual(o.iloc[0].tolist(), df.iloc[0].tolist())
def test_get_df_failure(self): self.client.put_object(Bucket=MY_BUCKET, Key=MY_PREFIX + '/key1', Body='awesome body') with self.assertRaises(ClientError): _ = get_df(self.client, MY_BUCKET, 'unknown_key', format='pickle')
def test_get_df_success_unknown_type(self): with self.assertRaises(AssertionError): _ = get_df(self.client, MY_BUCKET, MY_PREFIX + '/key', format='txt')