def test_should_download_zipped_csv(self): os = OSFS("./tests/test_integration/resources/") file_name = "test_csv_zipped" test_zip_file = 'http://localhost:8001/local_data/base_train.zip' test_ds_zip = DataSet(os, file_name, "test_id", test_zip_file, "test dataset", "zip") test_ds_zip.download() test_ds_zip.unzip_file() df = pd.read_csv(test_ds_zip.uri) self.assertEqual((2, 2), df.shape) os.remove(file_name + "/train.csv") os.removedir(file_name) ## only download os = OSFS("./tests/test_integration/resources/") file_name = "train.csv" test_file = 'http://localhost:8001/local_data/train.csv' test_ds = DataSet(os, file_name, "test_id", test_file, "test dataset") test_ds.download() test_ds.unzip_file() df = pd.read_csv(test_ds.uri) self.assertEqual((2, 2), df.shape) os.remove(file_name)
def create_production(build_dir, backups, script_dir): """Put the staging version to production hosted at register.geostandaarden.nl """ print "Building production..." logging.info("Building production...") deploy = OSFS('..') if deploy.exists(backups) == False: deploy.makedir(backups) deploy.copydir('%s/%s' % (script_dir, build_dir), 'register-new', overwrite=True) if deploy.exists('register') == True: # server refuses to recursively remove register/staging # hence we excplicitly remove symbolic link to staging try: deploy.remove('register/staging/staging') except ResourceNotFoundError: print "Warning, register/staging/staging not found..." try: deploy.removedir('register/staging') except ResourceNotFoundError: print "Warning, register/staging not found..." backup_dir = time.strftime('%Y-%m-%d-%H-%M-%S') # if deploy.exists('backups/%s' % backup_dir): # deploy.removedir('backups/%s' % backup_dir, force=True) deploy.copydir('register', 'backups/%s' % backup_dir, overwrite=True) try: deploy.movedir('register', 'register-old', overwrite=True) except ResourceNotFoundError: pass deploy.movedir('register-new', 'register', overwrite=True) # create symbolic link to standalone staging directory # fails if production is built first... deploy.makedir('register/staging') call('cd ../register/staging; ln -s ../../staging', shell=True) call('cd ../register; ln -s ../%s/log.txt' % script_dir , shell=True) try: deploy.removedir('register-old', force=True) except ResourceNotFoundError: pass call('chmod -R a+rx ../register', shell=True) print "Done building production..." logging.info("Production built successfully!")
def test_unzip_local_data(self): os = OSFS(".") os_remove = os.remove os.remove = mock.Mock(return_value=None) os.copy("./tests/resources/local_data/base_train.zip", "./tests/resources/local_data/train.zip") test_local = DataSet(os, "/local/path", "train", "./tests/resources/local_data/train.zip", "test dataset", "zip") test_local.unzip_file() result = os.exists("./tests/resources/local_data/train/train.csv") os.remove = os_remove os.remove("./tests/resources/local_data/train/train.csv") os.remove("./tests/resources/local_data/train.zip") os.removedir("./tests/resources/local_data/train") self.assertTrue(result)
def create_production(destination, backups, script_entry_path, production_path): """Put the staging version to production hosted at register.geostandaarden.nl """ ## TODO: feed this function absolute paths print "Building production..." logging.info("Building production...") production = OSFS(production_path) # if production.exists(backups) == False: # production.makedir(backups) # copy newly baked register/staging to production directory # NOTE: only build paths within script_dir are currently supported call ('cp -r %s %s' % (ospath.join(build_path, destination), ospath.join(production_path, destination + '-new')), shell=True) # production.copydir('%s/%s/%s' % (script_dir, build_path, destination), destination + '-new', overwrite=True) if production.exists(destination) == True: # server refuses to recursively remove register/staging # hence we excplicitly remove symbolic link to staging try: production.remove('%s/staging/staging' % destination) except ResourceNotFoundError: print "Warning, %s/staging/staging not found..." % destination try: production.removedir('%s/staging' % destination) except ResourceNotFoundError: print "Warning, %s/staging not found..." % destination backup_dir = time.strftime('%Y-%m-%d-%H-%M-%S') # if production.exists('backups/%s' % backup_dir): # production.removedir('backups/%s' % backup_dir, force=True) production.copydir(destination, '%s/%s' % (backups, backup_dir), overwrite=True) try: production.movedir(destination, destination + '-old', overwrite=True) except ResourceNotFoundError: pass production.movedir(destination + '-new', destination, overwrite=True) # create symbolic link to standalone staging directory # fails if production is built first... production.makedir('%s/staging' % destination) call('cd %s; ln -s %s' % (ospath.join(production_path, destination, 'staging'), ospath.join(production_path, 'staging')), shell=True) call('cd %s; ln -s %s' % (ospath.join(production_path, destination), ospath.join(script_entry_path, 'log.txt')), shell=True) try: production.removedir(destination + '-old', force=True) except ResourceNotFoundError: pass call('chmod -R a+rx %s/%s' % (production_path, destination), shell=True) print "Done building production..." logging.info("Production built successfully!")
class Timeline(object): """A timeline is a sequence of timestamped events.""" def __init__(self, path, name, max_events=None): self.path = path self.name = name self.fs = OSFS(path, create=True) self.max_events = max_events def __repr__(self): return "Timeline({!r}, {!r}, max_events={!r})".format( self.path, self.name, self.max_events) def new_event(self, event_type, timestamp=None, *args, **kwargs): """Create and return an event, to be used as a context manager""" if self.max_events is not None: size = len(self.fs.listdir(wildcard="*.json")) if size >= self.max_events: raise TimelineFullError( "The timeline has reached its maximum size") if timestamp is None: timestamp = int(time() * 1000.0) try: event_cls = _event_registry[event_type] except KeyError: raise UnknownEventError("No event type '{}'".format(event_type)) # Make an event id that we can be confident it's unique token = str(randint(0, 2**31)) event_id = "{}_{}_{}".format(event_type, timestamp, token) event = event_cls(self, event_id, timestamp, *args, **kwargs) log.debug('new event {!r}'.format(event)) return event def new_photo(self, file, filename=None, ext=None, **kwargs): """Create a new photo object""" event = self.new_event('IMAGE', **kwargs) if hasattr(file, 'getvalue'): bytes = file.getvalue() elif file is not None: if isinstance(file, basestring): with open(file, 'rb') as f: bytes = f.read() else: bytes = file.read() else: if bytes is None: raise ValueError("A value for 'file' or 'bytes' is required") event.attach_bytes(bytes, name='photo', filename=filename, ext=ext) return event def get_events(self, sort=True): """Get all accumulated events""" events = [] for event_filename in self.fs.listdir(wildcard="*.json"): with self.fs.open(event_filename, 'rb') as f: event = loads(f.read()) events.append(event) if sort: # sort by timestamp events.sort(key=itemgetter('timestamp')) return events def clear_all(self): """Clear all stored events""" for filename in self.fs.listdir(wildcard="*.json"): try: self.fs.remove(filename) except FSError: pass def clear_events(self, event_ids): """Clear any events that have been processed""" for event_id in event_ids: filename = "{}.json".format(event_id) try: self.fs.remove(filename) except FSError: pass def _write_event(self, event_id, event): if hasattr(event, 'to_data'): event = event.to_data() event['event_id'] = event_id event_json = dumps(event, indent=4) filename = "{}.json".format(event_id) with self.fs.open(filename, 'wb') as f: f.write(event_json)
class Timeline(object): """A timeline is a sequence of timestamped events.""" def __init__(self, path, name, max_events=None): self.path = path self.name = name self.fs = OSFS(path, create=True) self.max_events = max_events def __repr__(self): return "Timeline({!r}, {!r}, max_events={!r})".format(self.path, self.name, self.max_events) def new_event(self, event_type, timestamp=None, *args, **kwargs): """Create and return an event, to be used as a context manager""" if self.max_events is not None: size = len(self.fs.listdir(wildcard="*.json")) if size >= self.max_events: raise TimelineFullError("The timeline has reached its maximum size") if timestamp is None: timestamp = int(time() * 1000.0) try: event_cls = _event_registry[event_type] except KeyError: raise UnknownEventError("No event type '{}'".format(event_type)) # Make an event id that we can be confident it's unique token = str(randint(0, 2 ** 31)) event_id = kwargs.pop('event_id', None) or "{}_{}_{}".format(event_type, timestamp, token) event = event_cls(self, event_id, timestamp, *args, **kwargs) log.debug('new event {!r}'.format(event)) return event def new_photo(self, file, filename=None, ext=None, **kwargs): """Create a new photo object""" event = self.new_event('IMAGE', **kwargs) if hasattr(file, 'getvalue'): bytes = file.getvalue() elif file is not None: if isinstance(file, text_type): with open(file, 'rb') as f: bytes = f.read() else: bytes = file.read() else: if bytes is None: raise ValueError("A value for 'file' or 'bytes' is required") event.attach_bytes(bytes, name='photo', filename=filename, ext=ext) return event def get_events(self, sort=True): """Get all accumulated events""" events = [] for event_filename in self.fs.listdir(wildcard="*.json"): with self.fs.open(event_filename, 'rb') as f: event = loads(f.read().decode('utf-8')) events.append(event) if sort: # sort by timestamp events.sort(key=itemgetter('timestamp')) return events def clear_all(self): """Clear all stored events""" for filename in self.fs.listdir(wildcard="*.json"): try: self.fs.remove(filename) except FSError: pass def clear_events(self, event_ids): """Clear any events that have been processed""" for event_id in event_ids: filename = "{}.json".format(event_id) try: self.fs.remove(filename) except FSError: pass def _write_event(self, event_id, event): if hasattr(event, 'to_data'): event = event.to_data() event['event_id'] = event_id event_json = dumps(event, indent=4).encode('utf-8') filename = "{}.json".format(event_id) with self.fs.open(filename, 'wb') as f: f.write(event_json)
class TestDatasetManager(unittest.TestCase): trash_dir = "./tests/resources/trash_data" def setUp(self): self.os = OSFS(".") def tearDown(self): for data in self.os.listdir(self.trash_dir): if data != ".keep": self.os.remove("{}/{}".format(self.trash_dir, data)) self.os.close() def test_should_read_yaml_from_dir(self): expected = { "one_test": { "source": "http://source/teste", "description": "my little dataset" } } data = DatasetManager("./tests/resources/one_data") self.assertDictEqual(data.get_datasets(), expected) def test_should_read_multiple_yaml_from_dir(self): expected = { "one_test": { "source": "https://raw.githubusercontent.com/pcsanwald/kaggle-titanic/master/train.csv", "description": "my little dataset" }, "two_test": { "source": "https://raw.githubusercontent.com/pcsanwald/kaggle-titanic/master/train.csv", "description": "my little dataset 2" } } data = DatasetManager("./tests/resources/multiple_data", fs=self.os) result = list(data.get_datasets().keys()) result.sort() expected = ["one_test", "two_test"] self.assertListEqual(expected, result) def test_should_get_dataset(self): data = DatasetManager("./tests/resources/local_data") dataset = { "local_test": { "source": "./tests/resources/local_data/train.csv", "description": "my little dataset local" } } self.assertDictEqual(data.get_dataset("local_test"), dataset.get("local_test")) def test_should_get_dataset_unknown(self): data = DatasetManager("./tests/resources/local_data") with self.assertRaises(IOError): data.get_dataset("unknown_test") def test_should_create_dataset(self): data = DatasetManager(self.trash_dir, fs=self.os) identifier = "data_name" dataset = { "identifier": identifier, "description": "description", "source": "/tmp/test.csv", } data.create_dataset(**dataset) loaded_datasets = data.get_datasets() dataset_config = loaded_datasets.get(identifier) self.assertTrue( self.os.isfile("{}/{}.yaml".format(self.trash_dir, identifier))) self.assertEqual(len(self.os.listdir(self.trash_dir)), 2) self.assertEqual(list(loaded_datasets.keys())[0], identifier) self.assertEqual(dataset_config.get("description"), dataset["description"]) self.assertEqual(dataset_config.get("source"), dataset["source"]) def test_should_create_dataset_with_custom_data(self): data = DatasetManager(self.trash_dir, fs=self.os) identifier = "data_name_custom" dataset = { "identifier": identifier, "description": "description", "source": "/tmp/test.csv" } data.create_dataset(**dataset) self.assertTrue( self.os.isfile("{}/{}.yaml".format(self.trash_dir, identifier))) self.assertEqual(len(os.listdir(self.trash_dir)), 2) loaded_dataset = data.get_datasets() self.assertEqual(list(loaded_dataset.keys()), [identifier]) datasource_configs = loaded_dataset.get(identifier) self.assertEqual(datasource_configs["description"], dataset["description"]) self.assertEqual(datasource_configs["source"], dataset["source"]) def test_should_remove_dataset(self): data = DatasetManager(self.trash_dir, fs=self.os) identifier = "data_name" dataset = { "identifier": identifier, "description": "description", "source": "/tmp/test.csv" } data.create_dataset(**dataset) self.assertTrue( os.path.isfile("{}/{}.yaml".format(self.trash_dir, identifier))) self.assertEqual(len(os.listdir(self.trash_dir)), 2) data.remove_dataset(identifier) self.assertFalse( os.path.isfile("{}/{}.yaml".format(self.trash_dir, identifier))) self.assertEqual(len(os.listdir(self.trash_dir)), 1) def test_should_remove_unknown_dataset(self): data = DatasetManager("./tests/resources/local_data", fs=self.os) with self.assertRaises(IOError): data.remove_dataset("unknown_dataset")
if "renpy.zip" not in cwdfs.listdir("/"): puts("Downloading Ren'Py") r = requests.get( "https://www.renpy.org/dl/6.99.12.4/renpy-6.99.12.4-sdk.zip", stream=True) r.raise_for_status() with cwdfs.open("renpy.zip", 'wb') as fd: total_length = int(r.headers.get('content-length')) for chunk in progress.bar(r.iter_content(chunk_size=1024), expected_size=(total_length / 1024) + 1): fd.write(chunk) puts("Extracting Ren'Py") with ZipFS("./renpy.zip") as zipfs: fscopy.copy_dir(zipfs, "renpy-6.99.12.4-sdk", tempfs, "renpy") cwdfs.remove("renpy.zip") puts("ModTemplate setup") with indent(2): if "modtemplate.zip" not in cwdfs.listdir("/"): puts("Downloading ModTemplate") r = requests.get( "https://github.com/Monika-After-Story/DDLCModTemplate/releases/download/v1.1.0/DDLCModTemplate_1.1.0.zip", stream=True) r.raise_for_status() with cwdfs.open("modtemplate.zip", 'wb') as fd: total_length = int(r.headers.get('content-length')) for chunk in progress.bar(r.iter_content(chunk_size=1024), expected_size=(total_length / 1024) + 1): fd.write(chunk)