Пример #1
0
 def test_get_df_success_with_pickle_type(self):
     o = pandas.DataFrame.from_dict(self.data)
     self.client.put_object(Body=pickle.dumps(o),
                            Bucket=MY_BUCKET,
                            Key=MY_PREFIX + '/key1.pickle')
     s3_o = get_df(self.client,
                   MY_BUCKET,
                   MY_PREFIX + '/key1.pickle',
                   format='pickle')
     self.assertSequenceEqual(list(o.columns), list(s3_o.columns))
     self.assertSequenceEqual(o.iloc[0].tolist(), s3_o.iloc[0].tolist())
Пример #2
0
 def test_get_df_success_with_parquet_type(self):
     buffer = io.BytesIO()
     df = pandas.DataFrame.from_dict(self.data)
     df.to_parquet(buffer, engine='pyarrow')
     self.client.put_object(Bucket=MY_BUCKET,
                            Key=MY_PREFIX + '/key1.parquet',
                            Body=buffer.getvalue())
     o = get_df(self.client,
                MY_BUCKET,
                MY_PREFIX + '/key1.parquet',
                format='parquet')
     self.assertSequenceEqual(list(o.columns), list(df.columns))
     self.assertSequenceEqual(o.iloc[0].tolist(), df.iloc[0].tolist())
Пример #3
0
 def test_get_df_success_with_csv_type(self):
     buffer = io.StringIO()
     df = pandas.DataFrame.from_dict(self.data)
     df.to_csv(buffer, index_label=False)
     self.client.put_object(Bucket=MY_BUCKET,
                            Key=MY_PREFIX + '/key1.csv',
                            Body=buffer.getvalue())
     o = get_df(self.client,
                MY_BUCKET,
                MY_PREFIX + '/key1.csv',
                format='csv')
     self.assertSequenceEqual(list(o.columns), list(df.columns))
     self.assertSequenceEqual(o.iloc[0].tolist(), df.iloc[0].tolist())
Пример #4
0
 def test_get_df_success_with_xlsx_type(self):
     buffer = io.BytesIO()
     df = pandas.DataFrame.from_dict(self.data)
     writer = pandas.ExcelWriter(buffer, engine='xlsxwriter')
     df.to_excel(writer, sheet_name='Sheet1', index=False)
     writer.save()
     self.client.put_object(Bucket=MY_BUCKET,
                            Key=MY_PREFIX + '/key1.xlsx',
                            Body=buffer.getvalue())
     o = get_df(self.client,
                MY_BUCKET,
                MY_PREFIX + '/key1.xlsx',
                format='xlsx')
     self.assertSequenceEqual(list(o.columns), list(df.columns))
     self.assertSequenceEqual(o.iloc[0].tolist(), df.iloc[0].tolist())
Пример #5
0
 def test_get_df_failure(self):
     self.client.put_object(Bucket=MY_BUCKET,
                            Key=MY_PREFIX + '/key1',
                            Body='awesome body')
     with self.assertRaises(ClientError):
         _ = get_df(self.client, MY_BUCKET, 'unknown_key', format='pickle')
Пример #6
0
 def test_get_df_success_unknown_type(self):
     with self.assertRaises(AssertionError):
         _ = get_df(self.client,
                    MY_BUCKET,
                    MY_PREFIX + '/key',
                    format='txt')