def test_save(self): t = XArray([1, 2, 3]) path = '{}/tmp/array-csv'.format(hdfs_prefix) t.save(path) success_path = os.path.join(path, '_SUCCESS') self.assertTrue(fileio.is_file(success_path)) fileio.delete(path)
def save_as_csv(self, path, **params): """ Saves the RDD to file as text. """ self._entry(path=path) # noinspection PyShadowingNames def to_csv(row, **params): sio = StringIO.StringIO() writer = csv.writer(sio, **params) try: writer.writerow([row], **params) ret = sio.getvalue() return ret except IOError: return '' fileio.delete(path) with fileio.open_file(path, 'w') as f: self.begin_iterator() elems_at_a_time = 10000 ret = self.iterator_get_next(elems_at_a_time) while True: for row in ret: line = to_csv(row, **params) f.write(line) if len(ret) == elems_at_a_time: ret = self.iterator_get_next(elems_at_a_time) else: break
def test_save_format(self): t = XArray([1, 2, 3]) path = '{}/tmp/array-csv'.format(hdfs_prefix) t.save(path, format='csv') with fileio.open_file(path) as f: self.assertEqual('1', f.readline().strip()) self.assertEqual('2', f.readline().strip()) self.assertEqual('3', f.readline().strip()) fileio.delete(path)
def test_save(self): t = XFrame({'id': [30, 20, 10], 'val': ['a', 'b', 'c']}) path = '{}/tmp/frame'.format(hdfs_prefix) t.save(path, format='binary') with fileio.open_file(os.path.join(path, '_metadata')) as f: metadata = pickle.load(f) self.assertListEqual([['id', 'val'], [int, str]], metadata) # TODO find some way to check the data fileio.delete(path)
def test_save(self): t = XFrame({'id': [30, 20, 10], 'val': ['a', 'b', 'c']}) path = '{}/tmp/frame-csv'.format(hdfs_prefix) t.save(path, format='csv') with fileio.open_file(path + '.csv') as f: heading = f.readline().rstrip() self.assertEqual('id,val', heading) self.assertEqual('30,a', f.readline().rstrip()) self.assertEqual('20,b', f.readline().rstrip()) self.assertEqual('10,c', f.readline().rstrip()) fileio.delete(path + '.csv')
def test_read_parquet_str(self): t = XFrame({'id': [1, 2, 3], 'val': ['a', 'b', 'c']}) path = '{}/tmp/frame-parquet'.format(hdfs_prefix) t.save(path, format='parquet') res = XFrame('{}/tmp/frame-parquet.parquet'.format(hdfs_prefix)) # results may not come back in the same order res = res.sort('id') self.assertEqualLen(3, res) self.assertListEqual(['id', 'val'], res.column_names()) self.assertListEqual([int, str], res.column_types()) self.assertDictEqual({'id': 1, 'val': 'a'}, res[0]) self.assertDictEqual({'id': 2, 'val': 'b'}, res[1]) self.assertDictEqual({'id': 3, 'val': 'c'}, res[2]) fileio.delete(path)
def save_as_text(self, path): """ Saves the RDD to file as text. """ self._entry(path=path) fileio.delete(path) try: self._rdd.saveAsTextFile(path) except: # TODO distinguish between filesystem errors and pickle errors raise TypeError('The XArray save failed.') metadata = self.elem_type metadata_path = os.path.join(path, '_metadata') with fileio.open_file(metadata_path, 'w') as f: # TODO detect filesystem errors pickle.dump(metadata, f) lineage_path = os.path.join(path, '_lineage') self.lineage.save(lineage_path)
def save(self, path): """ Saves the RDD to file in pickled form. """ self._entry(path=path) # this only works for local files fileio.delete(path) try: self._rdd.saveAsPickleFile(path) # action ? except: # TODO distinguish between filesystem errors and pickle errors raise TypeError('The XArray save failed.') metadata = self.elem_type metadata_path = os.path.join(path, '_metadata') with fileio.open_file(metadata_path, 'w') as f: # TODO detect filesystem errors pickle.dump(metadata, f) lineage_path = os.path.join(path, '_lineage') self.lineage.save(lineage_path)
def test_save(self): t = XFrame({'id': [30, 20, 10], 'val': ['a', 'b', 'c']}) path = '{}/tmp/frame-parquet'.format(hdfs_prefix) t.save(path, format='parquet') # TODO verify fileio.delete(path + '.parquet')