def test_file_exists(self): """Ensure an error is raised when a file exists""" with TemporaryDirectory() as tmp: fp = os.path.join(tmp, "asdf.txt") f2 = open(fp,'w+') with self.assertRaises(Exception) as context: with atomic_write(fp) as f: f.write("exist")
def test_atomic_failure(self): """Ensure that file does not exist after failure during write""" with TemporaryDirectory() as tmp: fp = os.path.join(tmp, "asdf.txt") with self.assertRaises(FakeFileFailure): with atomic_write(fp, "w") as f: tmpfile = f.name assert os.path.exists(tmpfile) raise FakeFileFailure() assert not os.path.exists(tmpfile) assert not os.path.exists(fp)
def test_atomic_write(self): """Ensure file exists after being written successfully""" with TemporaryDirectory() as tmp: fp = os.path.join(tmp, "asdf.txt") with atomic_write(fp, "w") as f: assert not os.path.exists(fp) tmpfile = f.name f.write("asdf") assert not os.path.exists(tmpfile) assert os.path.exists(fp) with open(fp) as f: self.assertEqual(f.read(), "asdf")
for user in ["gorlins", "frankiekienlam"]: print("Id for {}: {}".format(user, get_user_id(user))) data_source = "data/hashed.xlsx" # TODO: read in, save as new parquet file, read back just id column, print # Parquet Question from pset_1.io import atomic_write import pandas as pd import pyarrow as pr # transform excel file into .parquet file with the same content df = pd.read_excel(data_source, index_col=0) df.to_parquet('hashed.parquet', 'pyarrow') # Print out the list of hashed_id using atomic_write hash_id_list = list( pr.parquet.read_table('hashed.parquet', columns=['hashed_id'])) hash_id_list_str = str(hash_id_list) with atomic_write("hashed_id_list.txt") as f: f.write(hash_id_list_str) with open("hashed_id_list.txt") as f: lines = f.read() print(lines)