def destroy_schema(host): keyspaces = get_keyspaces(host) for k in keyspaces: print('Dropping keyspace: %s' % k) cassandra_query(host, 'DROP KEYSPACE %s;' % k)
def check_cassandra(host): # TODO better way to check if host option is valid? # If there are no system keyspaces that can be retrieved, there is a problem # with the host input, or there is a problem with Cassandra ks = get_keyspaces(host, system=True) if len(ks) == 0: raise Exception('Cannot find system keyspaces, invalid host') return True
def destroy_schema(host, flag=None): success = False destroy = False keyspaces = get_keyspaces(host) if len(keyspaces) > 0: print('Removing keyspaces:') for k in keyspaces: print('\t' + k) if not flag: # check if user wahts to destroy listed keyspaces option = raw_input('Destroy keyspaces? [y/n]') if option == 'y' or option == 'Y': destroy = True elif flag == '-y': destroy = True else: # should never happen raise Exception('Invalid flag parameter') if destroy: for k in keyspaces: # drop old keyspaces print('Dropping keyspace: %s' % k) cassandra_query(host, 'DROP KEYSPACE %s;' % k) data_dir = get_data_dir() active_dirs = os.listdir(data_dir) print('Removing old keyspace directories') for d in active_dirs: if d in keyspaces: print('Removing keyspace directory: %s/%s' % (data_dir, d)) shutil.rmtree(data_dir + '/' + d) success = True else: success = True return success
def data_cleaner(host, backups=False): # This fuction finds inactive data directories and removes them # This includes unused keyspace directories and table directories # This will also remove snapshot files in the data directories if check_host(host) != 0: raise Exception('Invalid host parameter') keyspaces = get_keyspaces(host, system=True) # avoid system keyspaces structure = get_dir_structure(host, keyspaces) cass_data_dir = get_data_dir() print('Deleting old keyspaces . . .') for ks in os.listdir(cass_data_dir): if ks not in keyspaces: print('\tDeleting: ' + cass_data_dir + '/' + ks) shutil.rmtree(cass_data_dir + '/' + ks) print('\nDeleting old tables . . .') for keyspace in keyspaces: if keyspace not in _SYSTEM_KEYSPACES: print('\nProcessing keyspace: %s' % keyspace) # should only be directories in this folder data_dirs = set(os.listdir(cass_data_dir + '/' + keyspace)) table_dirs = set() for table in structure[keyspace].keys(): table_dirs.add(structure[keyspace][table]) inactive_dirs = data_dirs - table_dirs print('Removing inactive directories . . .') for d in inactive_dirs: print('\tDeleting: ' + cass_data_dir + '/' + keyspace + '/' + d) shutil.rmtree(cass_data_dir + '/' + keyspace + '/' + d) if backups: print('Removing old backup db files') for d in table_dirs: clean_directory(cass_data_dir + '/' + keyspace + '/' + d + '/backups') print('\nClearing old snapshots . . .') subprocess.call(['nodetool', 'clearsnapshot'])
def data_cleaner(host='localhost', backups=False): # This fuction finds inactive data directories and removes them # This includes unused keyspace directories and table directories keyspaces = get_keyspaces(host, system=True) if len(keyspaces) == 0: # there should always be system keyspaces raise Exception('Invalid host parameter') structure = get_dir_structure(host, keyspaces) cass_data_dir = get_data_dir() print('Deleting old keyspaces . . .') for ks in os.listdir(cass_data_dir): if ks not in keyspaces: print('\tDeleting: ' + cass_data_dir + '/' + ks) shutil.rmtree(cass_data_dir + '/' + ks) print('\nDeleting old tables . . .') for keyspace in keyspaces: if keyspace not in _SYSTEM_KEYSPACES: print('\nProcessing keyspace: %s' % keyspace) # should only be directories in this folder data_dirs = set(os.listdir(cass_data_dir + '/' + keyspace)) table_dirs = set() for table in structure[keyspace].keys(): table_dirs.add(structure[keyspace][table]) inactive_dirs = data_dirs - table_dirs print('Removing inactive directories . . .') for d in inactive_dirs: print('\tDeleting: ' + cass_data_dir + '/' + keyspace + '/' + d) shutil.rmtree(cass_data_dir + '/' + keyspace + '/' + d) if backups: print('Removing old backup db files') for d in table_dirs: clean_directory(cass_data_dir + '/' + keyspace + '/' + d + '/backups')
def save_schema(keyspace_arg=None): host = get_rpc_address() save_path = sys.path[0] + '/.snapshots/schemas' keyspaces = get_keyspaces(host) if keyspace_arg: for ks in keyspace_arg: if ks not in keyspaces: print('ERROR: Invalid keyspace argument') exit(1) print('Saving schema . . .') print_save_path = write_schema(host, save_path) print('Saved schema as %s' % print_save_path) for ks in keyspaces: print_save_path = write_schema(host, save_path, ks) print('Saved keyspace schema as %s' % print_save_path) print('Compressing schema file') shutil.make_archive(save_path, 'zip', save_path) print('Saving ring information . . .') write_ring_info(sys.path[0] + '/.snapshots')
def restore(hosts, load_path, keyspace_arg=None, table_arg=None, y_flag=None): print('Checking Cassandra status . . .') try: subprocess.check_output(['nodetool', 'status']) except: raise Exception('Cassandra has not yet started') # keyspaces inside snapshot directory avaliable_keyspaces = filter(lambda x: os.path.isdir(load_path + '/' + x), \ os.listdir(load_path)) print('Checking keyspace arguments') if keyspace_arg: for keyspace in keyspace_arg: if keyspace not in avaliable_keyspaces: raise Exception('Keyspace "%s" not in snapshot folder' % keyspace) load_keyspaces = keyspace_arg else: load_keyspaces = avaliable_keyspaces print('Checking table arguments . . .') if table_arg: if not keyspace_arg or len(keyspace_arg) != 1: raise Exception( 'Only one keyspace can be specified with table arg') for tb in table_arg: if tb not in os.listdir(load_path + '/' + load_keyspaces[0]): raise Exception('Table "%s" not found in keyspace "%s"' % (tb, load_keyspaces[0])) else: load_tables = set(table_arg) else: print('No table arguments.') print('Valid arguments.\n') print('Destroying existing database') if not destroy_schema(hosts[0], y_flag): print('Unable to destroy previous data, exiting script') sys.exit(0) # delete old keyspace directories data_cleaner(hosts[0]) for keyspace in load_keyspaces: print('Creating schema for %s' % keyspace) restore_schema(hosts[0], load_path, keyspace) # keyspaces just created by schema existing_keyspaces = get_keyspaces(hosts[0]) # basic schema in a json format structure = get_dir_structure(hosts[0], existing_keyspaces) for keyspace in load_keyspaces: print('Loading keyspace "%s"' % keyspace) if not table_arg: load_tables = filter( lambda x: os.path.isdir(load_path + '/' + keyspace + '/' + x), os.listdir(load_path + '/' + keyspace)) existing_tables = structure[keyspace].keys() for table in load_tables: if table not in existing_tables: raise Exception('Table not in schema, error with snapshot') load_table_dir = load_path + '/' + keyspace + '/' + table print('\n\nLoading table: %s' % table) # sstableloader has been more stable than nodetool refresh subprocess.call( ['/bin/sstableloader', '-d', ', '.join(hosts), load_table_dir]) print('Restoration complete')
def snapshot(keyspace_arg=None, table_arg=None): # nodetool can only run localhost and cqlsh can only run on host argument host = get_rpc_address() title = host # all local snapshots are named by its ip address or rpc_address save_root = sys.path[0] + '/.snapshots/' if check_host(host) != 0: print('ERROR: Invalid host, check rpc_address in this node\'s yaml file') exit(1) keyspaces = get_keyspaces(host) # set; retrieves through cqlsh if len(keyspaces) == 0: # edge case print('ERROR: No keyspaces found') exit(1) print('Checking keyspace arguments . . .') if keyspace_arg: for ks in keyspace_arg: if ks not in keyspaces: print('ERROR: Keyspaces "%s" not found.' % ks) exit(1) else: keyspaces = set(keyspace_arg) else: print('No keyspace arguments.') structure = get_dir_structure(host, keyspaces) # basic schema in json format print('Checking table arguments . . .') if table_arg: if not keyspace_arg or len(keyspace_arg) != 1: print('ERROR: Only one keyspace can be specified with table arg') exit(1) ks = next(iter(keyspaces)) # retrieve only element in set for tb in table_arg: if tb not in structure[ks]: print('ERROR: Table "%s" not found in keyspace "%s"' % (tb, ks)) exit(1) else: tables = set(table_arg) else: print('No table arguments.') print('Valid arguments.\n') print('Clearing previous cassandra data snapshots . . .') subprocess.call(['nodetool', 'clearsnapshot']) if os.path.isdir(save_root): # remove old snapshots from .snapshot for f in os.listdir(save_root): if os.path.isdir(save_root + f): shutil.rmtree(save_root + f) else: os.remove(save_root + f) save_path = save_root + title if os.path.exists(save_path): print('ERROR: Snapshot save path conflict') exit(1) print('Saving snapshot into %s . . .' % save_path) print('Producing snapshots . . .') if keyspace_arg: if table_arg: ks = next(iter(keyspaces)) for table in tables: run_snapshot(title, ks, table) else: run_snapshot(title, ' '.join(keyspaces)) else: run_snapshot(title) cassandra_data_dir = get_data_dir() for ks in keyspaces: if not table_arg: tables = structure[ks] for tb in tables: save_table_path = '%(save_path)s/%(keyspace)s/%(table)s/' \ % dict(save_path = save_path, keyspace = ks, table = tb) load_dir = '%(data_dir)s/%(keyspace)s/%(table_dir)s/snapshots/%(ss_title)s' \ % dict(data_dir = cassandra_data_dir, keyspace = ks, table_dir = structure[ks][tb], ss_title = title) print('Storing %s in %s' % (tb, save_table_path)) shutil.copytree(load_dir, save_table_path) print('Compressing snapshot file') shutil.make_archive(save_path, 'zip', save_path) print('\nProcess complete. Snapshot stored in %s\n' % save_path)
def restore(hosts, load_path, keyspace_arg = None, table_arg = None, y_flag=None): print('Checking Cassandra status . . .') try: subprocess.check_output(['nodetool', 'status']) except: raise Exception('Cassandra has not yet started') # keyspaces inside snapshot directory avaliable_keyspaces = filter(lambda x: os.path.isdir(load_path + '/' + x), \ os.listdir(load_path)) print('Checking keyspace arguments') if keyspace_arg: for keyspace in keyspace_arg: if keyspace not in avaliable_keyspaces: raise Exception('Keyspace "%s" not in snapshot folder' % keyspace) load_keyspaces = keyspace_arg else: load_keyspaces = avaliable_keyspaces print('Checking table arguments . . .') if table_arg: if not keyspace_arg or len(keyspace_arg) != 1: raise Exception('Only one keyspace can be specified with table arg') for tb in table_arg: if tb not in os.listdir(load_path + '/' + load_keyspaces[0]): raise Exception('Table "%s" not found in keyspace "%s"' % (tb, load_keyspaces[0])) else: load_tables = set(table_arg) else: print('No table arguments.') print('Valid arguments.\n') print('Destroying existing database') if not destroy_schema(hosts[0], y_flag): print('Unable to destroy previous data, exiting script') sys.exit(0) # delete old keyspace directories data_cleaner(hosts[0]) for keyspace in load_keyspaces: print('Creating schema for %s' % keyspace) restore_schema(hosts[0], load_path, keyspace) # keyspaces just created by schema existing_keyspaces = get_keyspaces(hosts[0]) # basic schema in a json format structure = get_dir_structure(hosts[0], existing_keyspaces) for keyspace in load_keyspaces: print('Loading keyspace "%s"' % keyspace) if not table_arg: load_tables = filter( lambda x: os.path.isdir(load_path + '/' + keyspace + '/' + x), os.listdir(load_path + '/' + keyspace) ) existing_tables = structure[keyspace].keys() for table in load_tables: if table not in existing_tables: raise Exception('Table not in schema, error with snapshot') load_table_dir = load_path + '/' + keyspace + '/' + table print('\n\nLoading table: %s' % table) # sstableloader has been more stable than nodetool refresh subprocess.call(['/bin/sstableloader', '-d', ', '.join(hosts), load_table_dir]) print('Restoration complete')
def snapshot(host, save_path, title_arg=None, keyspace_arg=None, table_arg=None): # nodetool can only run localhost and cqlsh can only run on host argument # clear snapshot in default snapshot directory print('Checking Cassandra status . . .') try: subprocess.check_output(['nodetool', 'status']) except: raise Exception('Cassandra has not yet started') # TODO hacky # get_keyspaces() calls cassandra_query which checks if the host works keyspaces = get_keyspaces(host) # set of keyspaces if len(keyspaces) == 0: # edge case raise Exception('No keyspaces to snapshot. If Connection Error, ' + 'host option is invalid.') if not title_arg: title = '{:%Y-%m-%d_%H-%M-%S}'.format(datetime.datetime.now()) else: title = title_arg save_path = save_path + title if os.path.exists(save_path): raise Exception('Error: Snapshot directory already created') print('Checking keyspace arguments . . .') if keyspace_arg: # checks if keyspace argument exists in database for ks in keyspace_arg: if ks not in keyspaces: raise Exception('Keyspace "%s" not found.' % ks) else: keyspaces = set(keyspace_arg) else: print('No keyspace arguments.') structure = get_dir_structure(host, keyspaces) # basic schema in json format print('Checking table arguments . . .') if table_arg: if not keyspace_arg or len(keyspace_arg) != 1: raise Exception('Only one keyspace can be specified with table arg') ks = next(iter(keyspaces)) # retrieve only element in set for tb in table_arg: if tb not in structure[ks]: raise Exception('Table "%s" not found in keyspace "%s"' % (tb, ks)) else: tables = set(table_arg) else: print('No table arguments.') print('Valid arguments.\n') print('Clearing previous cassandra data snapshots . . .') subprocess.call(['nodetool', 'clearsnapshot']) print('Saving snapshot into %s . . .' % save_path) print('Producing snapshots . . .') if keyspace_arg: if table_arg: ks = next(iter(keyspaces)) for table in tables: run_snapshot(title, ks, table) else: run_snapshot(title, ' '.join(keyspaces)) else: run_snapshot(title) cassandra_data_dir = get_data_dir() for ks in keyspaces: if not table_arg: tables = structure[ks] for tb in tables: save_table_path = '%(save_path)s/%(keyspace)s/%(table)s/' \ % dict(save_path = save_path, keyspace = ks, table = tb) load_dir = '%(data_dir)s/%(keyspace)s/%(table_dir)s/snapshots/%(ss_title)s' \ % dict(data_dir = cassandra_data_dir, keyspace = ks, table_dir = structure[ks][tb], ss_title = title) print('Storing %s in %s' % (tb, save_table_path)) shutil.copytree(load_dir, save_table_path) print('Saving schema . . .') print_save_path = write_schema(host, save_path) print('Saved schema as %s' % print_save_path) for ks in keyspaces: print_save_path = write_schema(host, save_path, ks) print('Saved keyspace schema as %s' % print_save_path) pass print('\nProcess complete. Snapshot stored in %s\n' % save_path)
def snapshot(host, save_path, title_arg=None, keyspace_arg=None, table_arg=None): # nodetool can only run localhost and cqlsh can only run on host argument # clear snapshot in default snapshot directory print('Checking Cassandra status . . .') try: subprocess.check_output(['nodetool', 'status']) except: raise Exception('Cassandra has not yet started') # TODO hacky # get_keyspaces() calls cassandra_query which checks if the host works keyspaces = get_keyspaces(host) # set of keyspaces if len(keyspaces) == 0: # edge case raise Exception('No keyspaces to snapshot. If Connection Error, ' + 'host option is invalid.') if not title_arg: title = '{:%Y-%m-%d_%H-%M-%S}'.format(datetime.datetime.now()) else: title = title_arg save_path = save_path + title if os.path.exists(save_path): raise Exception('Error: Snapshot directory already created') print('Checking keyspace arguments . . .') if keyspace_arg: # checks if keyspace argument exists in database for ks in keyspace_arg: if ks not in keyspaces: raise Exception('Keyspace "%s" not found.' % ks) else: keyspaces = set(keyspace_arg) else: print('No keyspace arguments.') structure = get_dir_structure(host, keyspaces) # basic schema in json format print('Checking table arguments . . .') if table_arg: if not keyspace_arg or len(keyspace_arg) != 1: raise Exception( 'Only one keyspace can be specified with table arg') ks = next(iter(keyspaces)) # retrieve only element in set for tb in table_arg: if tb not in structure[ks]: raise Exception('Table "%s" not found in keyspace "%s"' % (tb, ks)) else: tables = set(table_arg) else: print('No table arguments.') print('Valid arguments.\n') print('Clearing previous cassandra data snapshots . . .') subprocess.call(['nodetool', 'clearsnapshot']) print('Saving snapshot into %s . . .' % save_path) print('Producing snapshots . . .') if keyspace_arg: if table_arg: ks = next(iter(keyspaces)) for table in tables: run_snapshot(title, ks, table) else: run_snapshot(title, ' '.join(keyspaces)) else: run_snapshot(title) cassandra_data_dir = get_data_dir() for ks in keyspaces: if not table_arg: tables = structure[ks] for tb in tables: save_table_path = '%(save_path)s/%(keyspace)s/%(table)s/' \ % dict(save_path = save_path, keyspace = ks, table = tb) load_dir = '%(data_dir)s/%(keyspace)s/%(table_dir)s/snapshots/%(ss_title)s' \ % dict(data_dir = cassandra_data_dir, keyspace = ks, table_dir = structure[ks][tb], ss_title = title) print('Storing %s in %s' % (tb, save_table_path)) shutil.copytree(load_dir, save_table_path) print('Saving schema . . .') print_save_path = write_schema(host, save_path) print('Saved schema as %s' % print_save_path) for ks in keyspaces: print_save_path = write_schema(host, save_path, ks) print('Saved keyspace schema as %s' % print_save_path) pass print('\nProcess complete. Snapshot stored in %s\n' % save_path)