def __init__(self, log_level=logging.INFO): """ Constructor. """ class_name = self.__class__.__name__ self.logger = logging.getLogger(class_name) self.logger.setLevel(log_level) self.logger.info('Starting {}'.format(class_name)) self.hosts = appscale_info.get_db_ips() self.retry_policy = IdempotentRetryPolicy() self.no_retries = FallthroughRetryPolicy() remaining_retries = INITIAL_CONNECT_RETRIES while True: try: self.cluster = Cluster(self.hosts, default_retry_policy=self.retry_policy) self.session = self.cluster.connect(KEYSPACE) break except cassandra.cluster.NoHostAvailable as connection_error: remaining_retries -= 1 if remaining_retries < 0: raise connection_error time.sleep(3) self.session.default_consistency_level = ConsistencyLevel.QUORUM
def restore_data(path, keyname, force=False): """ Restores the Cassandra backup. Args: path: A string containing the location on each of the DB machines to use for restoring data. keyname: A string containing the deployment's keyname. Raises: BRException if unable to find any Cassandra machines or if DB machine has insufficient space. """ logging.info("Starting new db restore.") db_ips = appscale_info.get_db_ips() if not db_ips: raise BRException("Unable to find any Cassandra machines.") machines_without_restore = [] for db_ip in db_ips: exit_code = utils.ssh(db_ip, keyname, "ls {}".format(path), method=subprocess.call) if exit_code != ExitCodes.SUCCESS: machines_without_restore.append(db_ip) if machines_without_restore and not force: logging.info("The following machines do not have a restore file: {}".format(machines_without_restore)) response = raw_input("Would you like to continue? [y/N] ") if response not in ["Y", "y"]: return for db_ip in db_ips: logging.info("Stopping Cassandra on {}".format(db_ip)) summary = utils.ssh(db_ip, keyname, "monit summary", method=subprocess.check_output) status = utils.monit_status(summary, CASSANDRA_MONIT_WATCH_NAME) retries = SERVICE_STOP_RETRIES while status != MonitStates.UNMONITORED: utils.ssh(db_ip, keyname, "monit stop {}".format(CASSANDRA_MONIT_WATCH_NAME), method=subprocess.call) time.sleep(3) summary = utils.ssh(db_ip, keyname, "monit summary", method=subprocess.check_output) status = utils.monit_status(summary, CASSANDRA_MONIT_WATCH_NAME) retries -= 1 if retries < 0: raise BRException("Unable to stop Cassandra") cassandra_dir = "{}/cassandra".format(APPSCALE_DATA_DIR) for db_ip in db_ips: logging.info("Restoring Cassandra data on {}".format(db_ip)) clear_db = 'find {0} -regex ".*\.\(db\|txt\|log\)$" -exec rm {{}} \;'.format(cassandra_dir) utils.ssh(db_ip, keyname, clear_db) if db_ip not in machines_without_restore: utils.ssh(db_ip, keyname, "tar xf {} -C {}".format(path, cassandra_dir)) utils.ssh(db_ip, keyname, "chown -R cassandra {}".format(cassandra_dir)) utils.ssh(db_ip, keyname, "monit start {}".format(CASSANDRA_MONIT_WATCH_NAME)) logging.info("Done with db restore.")
def backup_data(path, keyname): """ Backup Cassandra snapshot data directories/files. Args: path: A string containing the location to store the backup on each of the DB machines. keyname: A string containing the deployment's keyname. Raises: BRException if unable to find any Cassandra machines or if DB machine has insufficient space. """ logging.info("Starting new db backup.") db_ips = appscale_info.get_db_ips() if not db_ips: raise BRException('Unable to find any Cassandra machines.') for db_ip in db_ips: utils.ssh(db_ip, keyname, '{} clearsnapshot'.format(NODE_TOOL)) utils.ssh(db_ip, keyname, '{} snapshot'.format(NODE_TOOL)) get_snapshot_size = 'find {0} -name "snapshots" -exec du -s {{}} \;'.\ format(APPSCALE_DATA_DIR) du_output = utils.ssh(db_ip, keyname, get_snapshot_size, method=subprocess.check_output) backup_size = sum( int(line.split()[0]) for line in du_output.split('\n') if line) output_dir = '/'.join(path.split('/')[:-1]) + '/' df_output = utils.ssh(db_ip, keyname, 'df {}'.format(output_dir), method=subprocess.check_output) available = int(df_output.split('\n')[1].split()[3]) if backup_size > available * PADDING_PERCENTAGE: raise BRException('{} has insufficient space: {}/{}'.format( db_ip, available * PADDING_PERCENTAGE, backup_size)) cassandra_dir = '{}/cassandra'.format(APPSCALE_DATA_DIR) for db_ip in db_ips: create_tar = 'find . -regex ".*/snapshots/[0-9]*/.*" -exec tar '\ '--transform="s/snapshots\/[0-9]*\///" -cf {0} {{}} +'.format(path) utils.ssh(db_ip, keyname, 'cd {} && {}'.format(cassandra_dir, create_tar)) logging.info("Done with db backup.")
def __init__(self): hosts = appscale_info.get_db_ips() remaining_retries = INITIAL_CONNECT_RETRIES while True: try: cluster = Cluster(hosts) self.session = cluster.connect(keyspace=KEYSPACE) break except cassandra.cluster.NoHostAvailable as connection_error: remaining_retries -= 1 if remaining_retries < 0: raise connection_error time.sleep(3) self.session.default_consistency_level = ConsistencyLevel.QUORUM
def __init__(self): hosts = appscale_info.get_db_ips() remaining_retries = INITIAL_CONNECT_RETRIES while True: try: cluster = Cluster(hosts) self.session = cluster.connect(keyspace=KEYSPACE) break except cassandra.cluster.NoHostAvailable as connection_error: remaining_retries -= 1 if remaining_retries < 0: raise connection_error time.sleep(3) self.session.default_consistency_level = ConsistencyLevel.QUORUM self.retry_policy = IdempotentRetryPolicy()
def __init__(self): hosts = appscale_info.get_db_ips() remaining_retries = INITIAL_CONNECT_RETRIES while True: try: # Cassandra 2.1 only supports up to Protocol Version 3. cluster = Cluster(hosts, protocol_version=3) self.session = cluster.connect(keyspace=KEYSPACE) break except cassandra.cluster.NoHostAvailable as connection_error: remaining_retries -= 1 if remaining_retries < 0: raise connection_error time.sleep(3) self.session.default_consistency_level = ConsistencyLevel.QUORUM self.retry_policy = IdempotentRetryPolicy()
def backup_data(path, keyname): """ Backup Cassandra snapshot data directories/files. Args: path: A string containing the location to store the backup on each of the DB machines. keyname: A string containing the deployment's keyname. Raises: BRException if unable to find any Cassandra machines or if DB machine has insufficient space. """ logging.info("Starting new db backup.") db_ips = appscale_info.get_db_ips() if not db_ips: raise BRException('Unable to find any Cassandra machines.') for db_ip in db_ips: utils.ssh(db_ip, keyname, '{} clearsnapshot'.format(NODE_TOOL)) utils.ssh(db_ip, keyname, '{} snapshot'.format(NODE_TOOL)) get_snapshot_size = 'find {0} -name "snapshots" -exec du -s {{}} \;'.\ format(APPSCALE_DATA_DIR) du_output = utils.ssh(db_ip, keyname, get_snapshot_size, method=subprocess.check_output) backup_size = sum(int(line.split()[0]) for line in du_output.split('\n') if line) output_dir = '/'.join(path.split('/')[:-1]) + '/' df_output = utils.ssh(db_ip, keyname, 'df {}'.format(output_dir), method=subprocess.check_output) available = int(df_output.split('\n')[1].split()[3]) if backup_size > available * PADDING_PERCENTAGE: raise BRException('{} has insufficient space: {}/{}'. format(db_ip, available * PADDING_PERCENTAGE, backup_size)) cassandra_dir = '{}/cassandra'.format(APPSCALE_DATA_DIR) for db_ip in db_ips: create_tar = 'find . -regex ".*/snapshots/[0-9]*/.*" -exec tar '\ '--transform="s/snapshots\/[0-9]*\///" -cf {0} {{}} +'.format(path) utils.ssh(db_ip, keyname, 'cd {} && {}'.format(cassandra_dir, create_tar)) logging.info("Done with db backup.")
def prime_cassandra(replication): """ Create Cassandra keyspace and initial tables. Args: replication: An integer specifying the replication factor for the keyspace. Raises: AppScaleBadArg if replication factor is not greater than 0. TypeError if replication is not an integer. """ if not isinstance(replication, int): raise TypeError('Replication must be an integer') if int(replication) <= 0: raise dbconstants.AppScaleBadArg( 'Replication must be greater than zero') hosts = appscale_info.get_db_ips() cluster = None session = None remaining_retries = INITIAL_CONNECT_RETRIES while True: try: cluster = Cluster(hosts) session = cluster.connect() break except cassandra.cluster.NoHostAvailable as connection_error: remaining_retries -= 1 if remaining_retries < 0: raise connection_error time.sleep(3) session.default_consistency_level = ConsistencyLevel.QUORUM create_keyspace = """ CREATE KEYSPACE IF NOT EXISTS "{keyspace}" WITH REPLICATION = %(replication)s """.format(keyspace=KEYSPACE) keyspace_replication = { 'class': 'SimpleStrategy', 'replication_factor': replication } session.execute(create_keyspace, {'replication': keyspace_replication}) session.set_keyspace(KEYSPACE) for table in dbconstants.INITIAL_TABLES: create_table = """ CREATE TABLE IF NOT EXISTS "{table}" ( {key} blob, {column} text, {value} blob, PRIMARY KEY ({key}, {column}) ) WITH COMPACT STORAGE """.format(table=table, key=ThriftColumn.KEY, column=ThriftColumn.COLUMN_NAME, value=ThriftColumn.VALUE) statement = SimpleStatement(create_table, retry_policy=NO_RETRIES) logging.info('Trying to create {}'.format(table)) try: session.execute(statement) except cassandra.OperationTimedOut: logging.warning( 'Encountered an operation timeout while creating {} table. ' 'Waiting 1 minute for schema to settle.'.format(table)) time.sleep(60) raise create_batch_tables(cluster, session) create_groups_table(session) create_transactions_table(session) create_pull_queue_tables(cluster, session) first_entity = session.execute('SELECT * FROM "{}" LIMIT 1'.format( dbconstants.APP_ENTITY_TABLE)) existing_entities = len(list(first_entity)) == 1 define_ua_schema(session) metadata_insert = """ INSERT INTO "{table}" ({key}, {column}, {value}) VALUES (%(key)s, %(column)s, %(value)s) """.format(table=dbconstants.DATASTORE_METADATA_TABLE, key=ThriftColumn.KEY, column=ThriftColumn.COLUMN_NAME, value=ThriftColumn.VALUE) if not existing_entities: parameters = { 'key': bytearray(cassandra_interface.VERSION_INFO_KEY), 'column': cassandra_interface.VERSION_INFO_KEY, 'value': bytearray(str(POST_JOURNAL_VERSION)) } session.execute(metadata_insert, parameters) # Mark the newly created indexes as clean. parameters = { 'key': bytearray(cassandra_interface.INDEX_STATE_KEY), 'column': cassandra_interface.INDEX_STATE_KEY, 'value': bytearray(str(IndexStates.CLEAN)) } session.execute(metadata_insert, parameters) # Indicate that the database has been successfully primed. parameters = { 'key': bytearray(cassandra_interface.PRIMED_KEY), 'column': cassandra_interface.PRIMED_KEY, 'value': bytearray('true') } session.execute(metadata_insert, parameters) logging.info('Cassandra is primed.')
def restore_data(path, keyname, force=False): """ Restores the Cassandra backup. Args: path: A string containing the location on each of the DB machines to use for restoring data. keyname: A string containing the deployment's keyname. Raises: BRException if unable to find any Cassandra machines or if DB machine has insufficient space. """ logging.info("Starting new db restore.") db_ips = appscale_info.get_db_ips() if not db_ips: raise BRException('Unable to find any Cassandra machines.') machines_without_restore = [] for db_ip in db_ips: exit_code = utils.ssh(db_ip, keyname, 'ls {}'.format(path), method=subprocess.call) if exit_code != ExitCodes.SUCCESS: machines_without_restore.append(db_ip) if machines_without_restore and not force: logging.info( 'The following machines do not have a restore file: {}'.format( machines_without_restore)) response = raw_input('Would you like to continue? [y/N] ') if response not in ['Y', 'y']: return for db_ip in db_ips: logging.info('Stopping Cassandra on {}'.format(db_ip)) summary = utils.ssh(db_ip, keyname, 'monit summary', method=subprocess.check_output) status = utils.monit_status(summary, CASSANDRA_MONIT_WATCH_NAME) retries = SERVICE_STOP_RETRIES while status != MonitStates.UNMONITORED: utils.ssh(db_ip, keyname, 'monit stop {}'.format(CASSANDRA_MONIT_WATCH_NAME)) time.sleep(1) summary = utils.ssh(db_ip, keyname, 'monit summary', method=subprocess.check_output) status = utils.monit_status(summary, CASSANDRA_MONIT_WATCH_NAME) retries -= 1 if retries < 0: raise BRException('Unable to stop Cassandra') cassandra_dir = '{}/cassandra'.format(APPSCALE_DATA_DIR) for db_ip in db_ips: logging.info('Restoring Cassandra data on {}'.format(db_ip)) clear_db = 'find {0} -regex ".*\.\(db\|txt\|log\)$" -exec rm {{}} \;'.\ format(cassandra_dir) utils.ssh(db_ip, keyname, clear_db) if db_ip not in machines_without_restore: utils.ssh(db_ip, keyname, 'tar xf {} -C {}'.format(path, cassandra_dir)) utils.ssh(db_ip, keyname, 'monit start {}'.format(CASSANDRA_MONIT_WATCH_NAME)) logging.info("Done with db restore.")
def prime_cassandra(replication): """ Create Cassandra keyspace and initial tables. Args: replication: An integer specifying the replication factor for the keyspace. Raises: AppScaleBadArg if replication factor is not greater than 0. TypeError if replication is not an integer. """ if not isinstance(replication, int): raise TypeError('Replication must be an integer') if int(replication) <= 0: raise dbconstants.AppScaleBadArg('Replication must be greater than zero') hosts = appscale_info.get_db_ips() cluster = None session = None remaining_retries = INITIAL_CONNECT_RETRIES while True: try: cluster = Cluster(hosts) session = cluster.connect() break except cassandra.cluster.NoHostAvailable as connection_error: remaining_retries -= 1 if remaining_retries < 0: raise connection_error time.sleep(3) session.default_consistency_level = ConsistencyLevel.QUORUM create_keyspace = """ CREATE KEYSPACE IF NOT EXISTS "{keyspace}" WITH REPLICATION = %(replication)s """.format(keyspace=KEYSPACE) keyspace_replication = {'class': 'SimpleStrategy', 'replication_factor': replication} session.execute(create_keyspace, {'replication': keyspace_replication}) session.set_keyspace(KEYSPACE) for table in dbconstants.INITIAL_TABLES: create_table = """ CREATE TABLE IF NOT EXISTS "{table}" ( {key} blob, {column} text, {value} blob, PRIMARY KEY ({key}, {column}) ) WITH COMPACT STORAGE """.format(table=table, key=ThriftColumn.KEY, column=ThriftColumn.COLUMN_NAME, value=ThriftColumn.VALUE) statement = SimpleStatement(create_table, retry_policy=NO_RETRIES) logging.info('Trying to create {}'.format(table)) try: session.execute(statement) except cassandra.OperationTimedOut: logging.warning( 'Encountered an operation timeout while creating {} table. ' 'Waiting 1 minute for schema to settle.'.format(table)) time.sleep(60) raise create_batch_tables(cluster, session) create_pull_queue_tables(cluster, session) first_entity = session.execute( 'SELECT * FROM "{}" LIMIT 1'.format(dbconstants.APP_ENTITY_TABLE)) existing_entities = len(list(first_entity)) == 1 define_ua_schema(session) metadata_insert = """ INSERT INTO "{table}" ({key}, {column}, {value}) VALUES (%(key)s, %(column)s, %(value)s) """.format( table=dbconstants.DATASTORE_METADATA_TABLE, key=ThriftColumn.KEY, column=ThriftColumn.COLUMN_NAME, value=ThriftColumn.VALUE ) if not existing_entities: parameters = {'key': bytearray(cassandra_interface.VERSION_INFO_KEY), 'column': cassandra_interface.VERSION_INFO_KEY, 'value': bytearray(str(POST_JOURNAL_VERSION))} session.execute(metadata_insert, parameters) # Indicate that the database has been successfully primed. parameters = {'key': bytearray(cassandra_interface.PRIMED_KEY), 'column': cassandra_interface.PRIMED_KEY, 'value': bytearray('true')} session.execute(metadata_insert, parameters) logging.info('Cassandra is primed.')
def get_kind_averages(keys): """ Get an average size for each kind. Args: keys: A list of dictionaries containing keys. Returns: A dictionary listing the average size of each kind. """ hosts = appscale_info.get_db_ips() cluster = Cluster(hosts, default_retry_policy=BASIC_RETRIES) session = cluster.connect(KEYSPACE) entities_by_kind = {} for key_dict in keys: key = key_dict['key'] if is_entity(key): key_parts = key.split(KEY_DELIMITER) kind = key_parts[2].split(':')[0] kind_id = KEY_DELIMITER.join([key_parts[0], key_parts[1], kind]) if kind_id not in entities_by_kind: entities_by_kind[kind_id] = { 'keys': [], 'size': 0, 'fetched': 0 } entities_by_kind[kind_id]['keys'].append(key) for kind_id, kind in entities_by_kind.iteritems(): shuffle(kind['keys']) if not entities_by_kind: return {} futures = [] for _ in range(50): kind = choice(entities_by_kind.keys()) try: key = entities_by_kind[kind]['keys'].pop() except IndexError: continue select = """ SELECT {value} FROM "{table}" WHERE {key}=%(key)s AND {column}=%(column)s """.format(value=ThriftColumn.VALUE, table=APP_ENTITY_TABLE, key=ThriftColumn.KEY, column=ThriftColumn.COLUMN_NAME) parameters = {'key': bytearray(key), 'column': APP_ENTITY_SCHEMA[0]} future = session.execute_async(select, parameters) futures.append({'future': future, 'kind': kind}) for future_dict in futures: future = future_dict['future'] kind = future_dict['kind'] try: entity = future.result()[0].value except IndexError: continue entities_by_kind[kind]['size'] += len(entity) entities_by_kind[kind]['fetched'] += 1 kind_averages = {} for kind_id, kind in entities_by_kind.iteritems(): try: kind_averages[kind_id] = int(kind['size'] / kind['fetched']) except ZeroDivisionError: kind_averages[kind_id] = 0 return kind_averages
def get_kind_averages(keys): """ Get an average size for each kind. Args: keys: A list of dictionaries containing keys. Returns: A dictionary listing the average size of each kind. """ hosts = appscale_info.get_db_ips() retry_policy = IdempotentRetryPolicy() cluster = Cluster(hosts, default_retry_policy=retry_policy) session = cluster.connect(KEYSPACE) entities_by_kind = {} for key_dict in keys: key = key_dict['key'] if is_entity(key): key_parts = key.split(KEY_DELIMITER) kind = key_parts[2].split(':')[0] kind_id = KEY_DELIMITER.join([key_parts[0], key_parts[1], kind]) if kind_id not in entities_by_kind: entities_by_kind[kind_id] = {'keys': [], 'size': 0, 'fetched': 0} entities_by_kind[kind_id]['keys'].append(key) for kind_id, kind in entities_by_kind.iteritems(): shuffle(kind['keys']) if not entities_by_kind: return {} futures = [] for _ in range(50): kind = choice(entities_by_kind.keys()) try: key = entities_by_kind[kind]['keys'].pop() except IndexError: continue select = """ SELECT {value} FROM "{table}" WHERE {key}=%(key)s AND {column}=%(column)s """.format(value=ThriftColumn.VALUE, table=APP_ENTITY_TABLE, key=ThriftColumn.KEY, column=ThriftColumn.COLUMN_NAME) parameters = {'key': bytearray(key), 'column': APP_ENTITY_SCHEMA[0]} future = session.execute_async(select, parameters) futures.append({'future': future, 'kind': kind}) for future_dict in futures: future = future_dict['future'] kind = future_dict['kind'] try: entity = future.result()[0].value except IndexError: continue entities_by_kind[kind]['size'] += len(entity) entities_by_kind[kind]['fetched'] += 1 kind_averages = {} for kind_id, kind in entities_by_kind.iteritems(): try: kind_averages[kind_id] = int(kind['size'] / kind['fetched']) except ZeroDivisionError: kind_averages[kind_id] = 0 return kind_averages