def testFindDataset(self): # Test non-existing dataset (relative path) with self.assertRaises(Exception): findDataset('no_such_dataset.csv') # Test non-existing dataset (absolute path) with self.assertRaises(Exception): findDataset('/no_such_dataset.csv') # Test existing dataset (relative path) if not os.path.isdir('data'): os.makedirs('data') datasetPath = 'test_find_dataset.csv' filename = 'data/test_find_dataset.csv' # This is the uncompressed name. fullPath = os.path.abspath(filename) if os.path.exists(fullPath): os.remove(fullPath) fullPathCompressed = fullPath + ".gz" if os.path.exists(fullPathCompressed): os.remove(fullPathCompressed) # Create the "dataset" open(filename, 'w').write('123') path = findDataset(datasetPath) self.assertEqual(path, fullPath) self.assertTrue(os.path.exists(path)) # This should do nothing, since it is already compressed path = uncompressAndCopyDataset(path) self.assertEqual(path, fullPath) # Test existing dataset (absolute path) self.assertEqual(findDataset(fullPath), fullPath) # Test existing dataset (compressed path) # Create the compressed file import gzip f = gzip.GzipFile(fullPathCompressed, 'w') f.write("1,2,3\n") f.close() self.assertTrue(os.path.isfile(fullPathCompressed)) # Remove the original file os.remove(fullPath) self.assertEqual(findDataset(datasetPath), fullPathCompressed) # This should put the uncompressed file in the same directory path = uncompressAndCopyDataset(fullPathCompressed) self.assertEqual(path, fullPath) self.assertTrue(os.path.isfile(path)) os.remove(fullPath) os.remove(fullPathCompressed)
def uncompressAndCopyAllDatasets(datasets, destDir, overwrite): """If destDir is not None, we uncompress all datasets into that directory. Otherwise we uncompress them in place. If data is not compressed, then we just copy the file (if destDir is not None""" d = dict() for key in datasets: d[key] = uncompressAndCopyDataset(datasets[key], destDir, overwrite) return d