def search_data_hierarchy_fr(list_geonameid): """[summary] Args: list_geonameid ([type]): [description] Returns: [type]: [description] """ bool_h = True list_geonameid = list_to_str(list_geonameid) hierarchy = [] while bool_h is True: #recherche de la hiérarchie query = 'SELECT parentId FROM cassandra.hierarchy where childId IN ' row_parent_id = connection.execute(query + '(' + list_geonameid + ') ALLOW FILTERING') row_parent_id = row_parent_id.one() if row_parent_id is not None: row_parent_id = row_parent_id['parentid'] row_parent_id = str(row_parent_id) list_geonameid = row_parent_id hierarchy.append(row_parent_id) else: bool_h = False liste_hierarchy = [] #itération 1 par 1 pour que la hiérarchie soit enregitrée de la plus petite à la plus grande for hier in hierarchy: rows = connection.execute( 'SELECT * FROM cassandra.geo_name WHERE geonameid=' + hier) liste_hierarchy.append(rows.one()) return liste_hierarchy
def trim(self, key, length, batch_interface=None): ''' trim using Cassandra's tombstones black magic retrieve the WRITETIME of the last item we want to keep then delete everything written after that this is still pretty inefficient since it needs to retrieve length amount of items WARNING: since activities created using Batch share the same timestamp trim can trash up to (batch_size - 1) more activities than requested ''' query = "SELECT WRITETIME(%s) as wt FROM %s.%s WHERE feed_id='%s' ORDER BY activity_id DESC LIMIT %s;" trim_col = [c for c in self.model._columns.keys( ) if c not in self.model._primary_keys.keys()][0] parameters = ( trim_col, self.model._get_keyspace(), self.column_family_name, key, length + 1) results = execute(query % parameters) # compatibility with both cassandra driver 2.7 and 3.0 results_length = len(results.current_rows) if hasattr(results, 'current_rows') else len(results) if results_length < length: return trim_ts = (results[-1]['wt'] + results[-2]['wt']) // 2 delete_query = "DELETE FROM %s.%s USING TIMESTAMP %s WHERE feed_id='%s';" delete_params = ( self.model._get_keyspace(), self.column_family_name, trim_ts, key) execute(delete_query % delete_params)
def test_extra_field(self): drop_table(self.TestModel) sync_table(self.TestModel) self.TestModel.create() execute("ALTER TABLE {0} add blah int".format( self.TestModel.column_family_name(include_keyspace=True))) self.TestModel.objects().all()
def create_keyspace(): logger.info("create_keyspace(): creating keyspace %s", settings.CASSANDRA_CONNECTION['KEYSPACE']) connection.execute("CREATE KEYSPACE IF NOT EXISTS {} WITH REPLICATION = {}".format( settings.CASSANDRA_CONNECTION['KEYSPACE'], settings.CASSANDRA_CONNECTION['KEYSPACE_REPLICATION'] ))
def test_insert_statement_execute(self): """ Test to verify the execution of BaseCQLStatements using connection.execute @since 3.10 @jira_ticket PYTHON-505 @expected_result inserts a row in C*, updates the rows and then deletes all the rows using BaseCQLStatements @test_category data_types:object_mapper """ partition = uuid4() cluster = 1 self._insert_statement(partition, cluster) # Verifying update statement where = [WhereClause('partition', EqualsOperator(), partition), WhereClause('cluster', EqualsOperator(), cluster)] st = UpdateStatement(self.table_name, where=where) st.add_assignment(Column(db_field='count'), 2) st.add_assignment(Column(db_field='text'), "text_for_db_update") st.add_assignment(Column(db_field='text_set'), set(("foo_update", "bar_update"))) st.add_assignment(Column(db_field='text_list'), ["foo_update", "bar_update"]) st.add_assignment(Column(db_field='text_map'), {"foo": '3', "bar": '4'}) execute(st) self._verify_statement(st) # Verifying delete statement execute(DeleteStatement(self.table_name, where=where)) self.assertEqual(TestQueryUpdateModel.objects.count(), 0)
def get_avg_x_speed(): #with UDF connection.execute(CREATE_GET_X_SPEED_FUNCTION_QUERY.format(DB_NAME)) return list( connection.execute( AVG_X_SPEED_QUERY.format(DB_NAME)).current_rows[0].values())[0]
def create_materialized_view(model): """materialized_view must has base_model. Must with all base_model primary keys as primary key.""" table_cql = _get_create_table(model) """ cql example: CREATE TABLE rolltrek.course2 ("course_id" uuid , "user_id" uuid , "created_at" timestamp , PRIMARY KEY (("course_id"), "user_id")) WITH CLUSTERING ORDER BY ("user_id" ASC) """ m = re.match(r'CREATE TABLE (\S+) \((.*?)(PRIMARY KEY.*$)', table_cql) name = m[1] end = m[3].replace(')) WITH CLUSTERING', ') WITH CLUSTERING') # resolve columnNames t = m[2] columnNames = [] # maybe wrapped with quota for t2 in t.split(','): t2 = t2.strip() if t2: columnNames.append(t2.split(' ')[0]) # join cql select_part = ', '.join(columnNames) where_part = ' AND '.join( ['%s IS NOT NULL' % (name) for name in columnNames]) base_table_name = model.base_model.column_family_name() cql = """ CREATE MATERIALIZED VIEW %s AS SELECT %s FROM %s WHERE %s %s; """ % (name, select_part, base_table_name, where_part, end) connection.execute(cql)
def __create_keyspace(name, durable_writes, strategy_class, strategy_options, connection=None): cluster = get_cluster(connection) if name not in cluster.metadata.keyspaces: log.info(format_log_context("Creating keyspace %s", connection=connection), name) ks_meta = metadata.KeyspaceMetadata(name, durable_writes, strategy_class, strategy_options) execute(ks_meta.as_cql_query(), connection=connection) else: log.info(format_log_context("Not creating keyspace %s because it already exists", connection=connection), name)
def sync(self, alias): super(Command, self).sync(alias) if hasattr(settings, 'EPO_NORMAL_CASSANDRA_ACCESS_SETTINGS'): user = settings.EPO_NORMAL_CASSANDRA_ACCESS_SETTINGS['USER'] connection = connections[alias] keyspace = connection.settings_dict['NAME'] execute(GRANT_PATTERN % ('select', keyspace, user)) execute(GRANT_PATTERN % ('modify', keyspace, user))
def _update_options(model, connection=None): """Updates the table options for the given model if necessary. :param model: The model to update. :param connection: Name of the connection to use :return: `True`, if the options were modified in Cassandra, `False` otherwise. :rtype: bool """ ks_name = model._get_keyspace() msg = format_log_context("Checking %s for option differences", keyspace=ks_name, connection=connection) log.debug(msg, model) model_options = model.__options__ or {} table_meta = _get_table_metadata(model, connection=connection) # go to CQL string first to normalize meta from different versions existing_option_strings = set( table_meta._make_option_strings(table_meta.options)) existing_options = _options_map_from_strings(existing_option_strings) model_option_strings = metadata.TableMetadataV3._make_option_strings( model_options) model_options = _options_map_from_strings(model_option_strings) update_options = {} for name, value in model_options.items(): try: existing_value = existing_options[name] except KeyError: msg = format_log_context( "Invalid table option: '%s'; known options: %s", keyspace=ks_name, connection=connection) raise KeyError(msg % (name, existing_options.keys())) if isinstance(existing_value, six.string_types): if value != existing_value: update_options[name] = value else: try: for k, v in value.items(): if existing_value[k] != v: update_options[name] = value break except KeyError: update_options[name] = value if update_options: options = ' AND '.join( metadata.TableMetadataV3._make_option_strings(update_options)) query = "ALTER TABLE {0} WITH {1}".format(model.column_family_name(), options) execute(query, connection=connection) return True return False
def _sync_type(ks_name, type_model, omit_subtypes=None, connection=None): syncd_sub_types = omit_subtypes or set() for field in type_model._fields.values(): udts = [] columns.resolve_udts(field, udts) for udt in [u for u in udts if u not in syncd_sub_types]: _sync_type(ks_name, udt, syncd_sub_types, connection=connection) syncd_sub_types.add(udt) type_name = type_model.type_name() type_name_qualified = "%s.%s" % (ks_name, type_name) cluster = get_cluster(connection) keyspace = cluster.metadata.keyspaces[ks_name] defined_types = keyspace.user_types if type_name not in defined_types: log.debug(format_log_context("sync_type creating new type %s", keyspace=ks_name, connection=connection), type_name_qualified) cql = get_create_type(type_model, ks_name) execute(cql, connection=connection) cluster.refresh_user_type_metadata(ks_name, type_name) type_model.register_for_keyspace(ks_name, connection=connection) else: type_meta = defined_types[type_name] defined_fields = type_meta.field_names model_fields = set() for field in type_model._fields.values(): model_fields.add(field.db_field_name) if field.db_field_name not in defined_fields: execute("ALTER TYPE {0} ADD {1}".format(type_name_qualified, field.get_column_def()), connection=connection) else: field_type = type_meta.field_types[defined_fields.index(field.db_field_name)] if field_type != field.db_type: msg = format_log_context( 'Existing user type {0} has field "{1}" with a type ({2}) differing from the model user type ({3}).' ' UserType should be updated.', keyspace=ks_name, connection=connection) msg = msg.format(type_name_qualified, field.db_field_name, field_type, field.db_type) warnings.warn(msg) log.warning(msg) type_model.register_for_keyspace(ks_name, connection=connection) if len(defined_fields) == len(model_fields): log.info( format_log_context("Type %s did not require synchronization", keyspace=ks_name, connection=connection), type_name_qualified) return db_fields_not_in_model = model_fields.symmetric_difference(defined_fields) if db_fields_not_in_model: msg = format_log_context("Type %s has fields not referenced by model: %s", keyspace=ks_name, connection=connection) log.info(msg, type_name_qualified, db_fields_not_in_model)
def create_keyspace(name, strategy_class, replication_factor, durable_writes=True, **replication_values): """ *Deprecated - use :func:`create_keyspace_simple` or :func:`create_keyspace_network_topology` instead* Creates a keyspace If the keyspace already exists, it will not be modified. **This function should be used with caution, especially in production environments. Take care to execute schema modifications in a single context (i.e. not concurrently with other clients).** *There are plans to guard schema-modifying functions with an environment-driven conditional.* :param str name: name of keyspace to create :param str strategy_class: keyspace replication strategy class (:attr:`~.SimpleStrategy` or :attr:`~.NetworkTopologyStrategy` :param int replication_factor: keyspace replication factor, used with :attr:`~.SimpleStrategy` :param bool durable_writes: Write log is bypassed if set to False :param \*\*replication_values: Additional values to ad to the replication options map """ if not _allow_schema_modification(): return msg = "Deprecated. Use create_keyspace_simple or create_keyspace_network_topology instead" warnings.warn(msg, DeprecationWarning) log.warning(msg) cluster = get_cluster() if name not in cluster.metadata.keyspaces: # try the 1.2 method replication_map = { 'class': strategy_class, 'replication_factor': replication_factor } replication_map.update(replication_values) if strategy_class.lower() != 'simplestrategy': # Although the Cassandra documentation states for `replication_factor` # that it is "Required if class is SimpleStrategy; otherwise, # not used." we get an error if it is present. replication_map.pop('replication_factor', None) query = """ CREATE KEYSPACE {} WITH REPLICATION = {} """.format(name, json.dumps(replication_map).replace('"', "'")) if strategy_class != 'SimpleStrategy': query += " AND DURABLE_WRITES = {}".format( 'true' if durable_writes else 'false') execute(query)
def remove(timestamp, entity_name, dicted=False): deleted = connection.execute("select * from {0}.{3} where date='{1}' and time='{2}';"\ .format(DB_NAME, timestamp.strftime(DATE_PATTERN), timestamp.strftime(TIME_PATTERN), entity_name))[0] connection.execute("delete from {0}.{3} where date='{1}' and time='{2}';"\ .format(DB_NAME, timestamp.strftime(DATE_PATTERN), timestamp.strftime(TIME_PATTERN), entity_name)) if not dicted: return namedtuple('Struct', deleted.keys())(*deleted.values()) return deleted
def index(): execute( """ INSERT INTO mykeyspa.users (name, credits, user_id) VALUES (%s, %s, %s) """, ("John O'Reilly", 42, 123) ) return 'Hello Bunny my precious!!'
def _create_keyspace(name, durable_writes, strategy_class, strategy_options): if not _allow_schema_modification(): return cluster = get_cluster() if name not in cluster.metadata.keyspaces: log.info("Creating keyspace %s ", name) ks_meta = metadata.KeyspaceMetadata(name, durable_writes, strategy_class, strategy_options) execute(ks_meta.as_cql_query()) else: log.info("Not creating keyspace %s because it already exists", name)
def _sync_type(ks_name, type_model, omit_subtypes=None, connection=None): syncd_sub_types = omit_subtypes or set() for field in type_model._fields.values(): udts = [] columns.resolve_udts(field, udts) for udt in [u for u in udts if u not in syncd_sub_types]: _sync_type(ks_name, udt, syncd_sub_types, connection=connection) syncd_sub_types.add(udt) type_name = type_model.type_name() type_name_qualified = "%s.%s" % (ks_name, type_name) cluster = get_cluster(connection) keyspace = cluster.metadata.keyspaces[ks_name] defined_types = keyspace.user_types if type_name not in defined_types: log.debug(format_log_context("sync_type creating new type %s", keyspace=ks_name, connection=connection), type_name_qualified) cql = get_create_type(type_model, ks_name) execute(cql, connection=connection) cluster.refresh_user_type_metadata(ks_name, type_name) type_model.register_for_keyspace(ks_name, connection=connection) else: type_meta = defined_types[type_name] defined_fields = type_meta.field_names model_fields = set() for field in type_model._fields.values(): model_fields.add(field.db_field_name) if field.db_field_name not in defined_fields: execute("ALTER TYPE {0} ADD {1}".format(type_name_qualified, field.get_column_def()), connection=connection) else: field_type = type_meta.field_types[defined_fields.index(field.db_field_name)] if field_type != field.db_type: msg = format_log_context('Existing user type {0} has field "{1}" with a type ({2}) differing from the model user type ({3}).' ' UserType should be updated.', keyspace=ks_name, connection=connection) msg = msg.format(type_name_qualified, field.db_field_name, field_type, field.db_type) warnings.warn(msg) log.warning(msg) type_model.register_for_keyspace(ks_name, connection=connection) if len(defined_fields) == len(model_fields): log.info(format_log_context("Type %s did not require synchronization", keyspace=ks_name, connection=connection), type_name_qualified) return db_fields_not_in_model = model_fields.symmetric_difference(defined_fields) if db_fields_not_in_model: msg = format_log_context("Type %s has fields not referenced by model: %s", keyspace=ks_name, connection=connection) log.info(msg, type_name_qualified, db_fields_not_in_model)
def _insert_statement(self, partition, cluster): # Verifying insert statement st = InsertStatement(self.table_name) st.add_assignment(Column(db_field='partition'), partition) st.add_assignment(Column(db_field='cluster'), cluster) st.add_assignment(Column(db_field='count'), 1) st.add_assignment(Column(db_field='text'), self.text) st.add_assignment(Column(db_field='text_set'), set(("foo", "bar"))) st.add_assignment(Column(db_field='text_list'), ["foo", "bar"]) st.add_assignment(Column(db_field='text_map'), {"foo": '1', "bar": '2'}) execute(st) self._verify_statement(st)
def create_keyspace(name, strategy_class, replication_factor, durable_writes=True, **replication_values): """ *Deprecated - use :func:`create_keyspace_simple` or :func:`create_keyspace_network_topology` instead* Creates a keyspace If the keyspace already exists, it will not be modified. **This function should be used with caution, especially in production environments. Take care to execute schema modifications in a single context (i.e. not concurrently with other clients).** *There are plans to guard schema-modifying functions with an environment-driven conditional.* :param str name: name of keyspace to create :param str strategy_class: keyspace replication strategy class (:attr:`~.SimpleStrategy` or :attr:`~.NetworkTopologyStrategy` :param int replication_factor: keyspace replication factor, used with :attr:`~.SimpleStrategy` :param bool durable_writes: Write log is bypassed if set to False :param \*\*replication_values: Additional values to ad to the replication options map """ if not _allow_schema_modification(): return msg = "Deprecated. Use create_keyspace_simple or create_keyspace_network_topology instead" warnings.warn(msg, DeprecationWarning) log.warning(msg) cluster = get_cluster() if name not in cluster.metadata.keyspaces: # try the 1.2 method replication_map = { 'class': strategy_class, 'replication_factor': replication_factor } replication_map.update(replication_values) if strategy_class.lower() != 'simplestrategy': # Although the Cassandra documentation states for `replication_factor` # that it is "Required if class is SimpleStrategy; otherwise, # not used." we get an error if it is present. replication_map.pop('replication_factor', None) query = """ CREATE KEYSPACE {} WITH REPLICATION = {} """.format(metadata.protect_name(name), json.dumps(replication_map).replace('"', "'")) if strategy_class != 'SimpleStrategy': query += " AND DURABLE_WRITES = {}".format('true' if durable_writes else 'false') execute(query)
def update_compaction(model): """Updates the compaction options for the given model if necessary. :param model: The model to update. :return: `True`, if the compaction options were modified in Cassandra, `False` otherwise. :rtype: bool """ log.debug("Checking %s for compaction differences", model) table = get_table_settings(model) existing_options = table.options.copy() existing_compaction_strategy = existing_options[ 'compaction_strategy_class'] existing_options = json.loads( existing_options['compaction_strategy_options']) desired_options = get_compaction_options(model) desired_compact_strategy = desired_options.get( 'class', SizeTieredCompactionStrategy) desired_options.pop('class', None) do_update = False if desired_compact_strategy not in existing_compaction_strategy: do_update = True for k, v in desired_options.items(): val = existing_options.pop(k, None) if val != v: do_update = True # check compaction_strategy_options if do_update: options = get_compaction_options(model) # jsonify options = json.dumps(options).replace('"', "'") cf_name = model.column_family_name() query = "ALTER TABLE {0} with compaction = {1}".format( cf_name, options) execute(query) return True return False
def _drop_table(model): if not _allow_schema_modification(): return # don't try to delete non existant tables meta = get_cluster().metadata ks_name = model._get_keyspace() raw_cf_name = model._raw_column_family_name() try: meta.keyspaces[ks_name].tables[raw_cf_name] execute('DROP TABLE {0};'.format(model.column_family_name())) except KeyError: pass
def _update_options(model, connection=None): """Updates the table options for the given model if necessary. :param model: The model to update. :param connection: Name of the connection to use :return: `True`, if the options were modified in Cassandra, `False` otherwise. :rtype: bool """ ks_name = model._get_keyspace() msg = format_log_context("Checking %s for option differences", keyspace=ks_name, connection=connection) log.debug(msg, model) model_options = model.__options__ or {} table_meta = _get_table_metadata(model, connection=connection) # go to CQL string first to normalize meta from different versions existing_option_strings = set(table_meta._make_option_strings(table_meta.options)) existing_options = _options_map_from_strings(existing_option_strings) model_option_strings = metadata.TableMetadataV3._make_option_strings(model_options) model_options = _options_map_from_strings(model_option_strings) update_options = {} for name, value in model_options.items(): try: existing_value = existing_options[name] except KeyError: msg = format_log_context("Invalid table option: '%s'; known options: %s", keyspace=ks_name, connection=connection) raise KeyError(msg % (name, existing_options.keys())) if isinstance(existing_value, six.string_types): if value != existing_value: update_options[name] = value else: try: for k, v in value.items(): if existing_value[k] != v: update_options[name] = value break except KeyError: update_options[name] = value if update_options: options = ' AND '.join(metadata.TableMetadataV3._make_option_strings(update_options)) query = "ALTER TABLE {0} WITH {1}".format(model.column_family_name(), options) execute(query, connection=connection) return True return False
def search_data_cassandra(hit, admin_code): """[summary] Args: hit ([type]): [description] admin_code ([type]): [description] Returns: [type]: [description] """ #recherche des données de la liste des geonameid liée à la cible dans Cassandra list_geonameid = search_zone(admin_code, hit, "geonames") #recherche des geonames selon la zone indiquée list_geonameid_hierarchy = search_zone("admin4_code", hit, "geonames") #recuperation des geoname ayant le même CP car tous les geonames #ne sont pas associés à la table Hierarchie hierarchy = search_data_hierarchy_fr(list_geonameid_hierarchy) if list_geonameid == -1: return "L'admin_code n'existe pas" list_geonameid = list_to_str(list_geonameid) list_geoname = [] query = 'SELECT * FROM cassandra.geo_name WHERE geonameid IN (' + list_geonameid + ')' rows = connection.execute(query) for row in rows: list_geoname.append(row) return list_geoname, hierarchy
def fill(entity_name, base, batch_size, mongo_ids=[], neo_ids=[], strings=[]): data = json.load( open( convert( os.environ['SPACE_SHIP_HOME'] + '/generation/dummyMarket/cassandra json/{0}.json'.format(base), base, mongo_ids=mongo_ids, neo_ids=neo_ids, strings=strings))) number_of_batches = (len(data["data"]) // batch_size) + 1 for i in range(number_of_batches + 1): if i < number_of_batches: query = "BEGIN BATCH {0} APPLY BATCH;".format(' '.join([ "insert into {0}.{1} json '{2}';".format( DB_NAME, entity_name, json.dumps(item)) for item in data["data"][i * batch_size:(i + 1) * batch_size] ])) else: query = "BEGIN BATCH {0} APPLY BATCH;".format(' '.join([ "insert into {0}.{1} json '{2}';".format( DB_NAME, entity_name, json.dumps(item)) for item in data["data"][i * batch_size:] ])) print(connection.execute(query))
def update(table, params, dicted=False): where = {} update = {} for param in params: if 'set_' in param: update[param.replace('set_', '')] = params[param] else: where[param] = params[param] select_result = select(table, where, dicted=True) parsed_update_params = parse_params(update, ', ') #result = connection.execute #print('BEGIN BATCH ' + ' '.join(['update {0}.{1} set {3} where {2};'.format(DB_NAME, table, # extract_keys(item, ['date', 'time']), parsed_update_params) for item in select_result ]) + ' APPLY BATCH;') result = connection.execute('BEGIN BATCH ' + ' '.join([ 'update {0}.{1} set {3} where {2};'.format( DB_NAME, table, extract_keys(item, ['date', 'time']), parsed_update_params) for item in select_result ]) + ' APPLY BATCH;').current_rows #.current_rows if not dicted: return [ namedtuple('Struct', item.keys())(*item.values()) for item in result ] return result
def update(table_name, columns_getter, columns_setter): columns_filter = ', '.join([item[0] for item in columns_getter]) values_filter = ' and '.join([\ item[0] + ' = ' + str(item[1])\ if isinstance(item[1], int) or isinstance(item[1], float) else\ item[0] + ' = \'' + str(item[1]) + '\'' \ for item in (item for item in columns_getter if len(item) > 1)\ ]) for column in columns_setter: if len(column) > 1: try: column[1] = int(column[1]) except ValueError: try: column[1] = float(column[1]) except ValueError: pass values_setter = ', '.join([\ item[0] + ' = ' + str(item[1])\ if isinstance(item[1], int) or isinstance(item[1], float) else\ item[0] + ' = \'' + str(item[1]) + '\'' \ for item in (item for item in columns_setter if len(item) > 1)\ ]) if len(values_filter) > 0: values_filter = 'where ' + values_filter if len(values_setter) > 0: values_setter = 'set ' + values_setter print('update {0}.{1} {2} {3} allow filtering;'.format(DB_NAME, table_name, values_filter, values_setter)) return connection.execute('update {0}.{1} {3} {2};'.format(DB_NAME, table_name, values_filter, values_setter)).current_rows
def _execute(self, q): if self._batch: return self._batch.add_query(q) else: tmp = connection.execute(q, consistency_level=self._consistency, timeout=self._timeout) if self._if_not_exists or self._if_exists or self._conditional: check_applied(tmp) return tmp
def _verify_statement(self, original): st = SelectStatement(self.table_name) result = execute(st) response = result[0] for assignment in original.assignments: self.assertEqual(response[assignment.field], assignment.value) self.assertEqual(len(response), 7)
def _execute(self, q): if self._batch: return self._batch.add_query(q) else: result = connection.execute(q, consistency_level=self._consistency, timeout=self._timeout) if self._if_not_exists or self._if_exists or self._transaction: check_applied(result) return result
def _execute(self, q): if self._batch: return self._batch.add_query(q) else: tmp = connection.execute(q, consistency_level=self._consistency, timeout=self._timeout) if self._if_not_exists or self._transaction: check_applied(tmp) return tmp
def drop_keyspace(name): """ Drops a keyspace, if it exists. *There are plans to guard schema-modifying functions with an environment-driven conditional.* **This function should be used with caution, especially in production environments. Take care to execute schema modifications in a single context (i.e. not concurrently with other clients).** :param str name: name of keyspace to drop """ if not _allow_schema_modification(): return cluster = get_cluster() if name in cluster.metadata.keyspaces: execute("DROP KEYSPACE {}".format(metadata.protect_name(name)))
def drop_keyspace(name): """ Drops a keyspace, if it exists. *There are plans to guard schema-modifying functions with an environment-driven conditional.* **This function should be used with caution, especially in production environments. Take care to execute schema modifications in a single context (i.e. not concurrently with other clients).** :param str name: name of keyspace to drop """ if not _allow_schema_modification(): return cluster = get_cluster() if name in cluster.metadata.keyspaces: execute("DROP KEYSPACE {0}".format(metadata.protect_name(name)))
def _execute(self, q): if self._batch: return self._batch.add_query(q) else: result = connection.execute(q, consistency_level=self._consistency, timeout=self._timeout) if self._transaction: check_applied(result) return result
def _sync_type(ks_name, type_model, omit_subtypes=None): syncd_sub_types = omit_subtypes or set() for field in type_model._fields.values(): udts = [] columns.resolve_udts(field, udts) for udt in [u for u in udts if u not in syncd_sub_types]: _sync_type(ks_name, udt, syncd_sub_types) syncd_sub_types.add(udt) type_name = type_model.type_name() type_name_qualified = "%s.%s" % (ks_name, type_name) cluster = get_cluster() keyspace = cluster.metadata.keyspaces[ks_name] defined_types = keyspace.user_types if type_name not in defined_types: log.debug("sync_type creating new type %s", type_name_qualified) cql = get_create_type(type_model, ks_name) execute(cql) cluster.refresh_user_type_metadata(ks_name, type_name) type_model.register_for_keyspace(ks_name) else: defined_fields = defined_types[type_name].field_names model_fields = set() for field in type_model._fields.values(): model_fields.add(field.db_field_name) if field.db_field_name not in defined_fields: execute("ALTER TYPE {0} ADD {1}".format( type_name_qualified, field.get_column_def())) type_model.register_for_keyspace(ks_name) if len(defined_fields) == len(model_fields): log.info("Type %s did not require synchronization", type_name_qualified) return db_fields_not_in_model = model_fields.symmetric_difference( defined_fields) if db_fields_not_in_model: log.info("Type %s has fields not referenced by model: %s", type_name_qualified, db_fields_not_in_model)
def update_compaction(model): """Updates the compaction options for the given model if necessary. :param model: The model to update. :return: `True`, if the compaction options were modified in Cassandra, `False` otherwise. :rtype: bool """ log.debug("Checking %s for compaction differences", model) table = get_table_settings(model) existing_options = table.options.copy() existing_compaction_strategy = existing_options['compaction_strategy_class'] existing_options = json.loads(existing_options['compaction_strategy_options']) desired_options = get_compaction_options(model) desired_compact_strategy = desired_options.get('class', SizeTieredCompactionStrategy) desired_options.pop('class', None) do_update = False if desired_compact_strategy not in existing_compaction_strategy: do_update = True for k, v in desired_options.items(): val = existing_options.pop(k, None) if val != v: do_update = True # check compaction_strategy_options if do_update: options = get_compaction_options(model) # jsonify options = json.dumps(options).replace('"', "'") cf_name = model.column_family_name() query = "ALTER TABLE {} with compaction = {}".format(cf_name, options) execute(query) return True return False
def _execute_statement(model, statement, consistency_level, timeout): params = statement.get_context() s = SimpleStatement(str(statement), consistency_level=consistency_level, fetch_size=statement.fetch_size) if model._partition_key_index: key_values = statement.partition_key_values(model._partition_key_index) if not any(v is None for v in key_values): parts = model._routing_key_from_values(key_values, connection.get_cluster().protocol_version) s.routing_key = parts s.keyspace = model._get_keyspace() return connection.execute(s, params, timeout=timeout)
def _update_options(model): """Updates the table options for the given model if necessary. :param model: The model to update. :return: `True`, if the options were modified in Cassandra, `False` otherwise. :rtype: bool """ log.debug("Checking %s for option differences", model) model_options = model.__options__ or {} table_meta = _get_table_metadata(model) # go to CQL string first to normalize meta from different versions existing_option_strings = set(table_meta._make_option_strings(table_meta.options)) existing_options = _options_map_from_strings(existing_option_strings) model_option_strings = metadata.TableMetadataV3._make_option_strings(model_options) model_options = _options_map_from_strings(model_option_strings) update_options = {} for name, value in model_options.items(): existing_value = existing_options[name] if isinstance(existing_value, six.string_types): if value != existing_value: update_options[name] = value else: try: for k, v in value.items(): if existing_value[k] != v: update_options[name] = value break except KeyError: update_options[name] = value if update_options: options = ' AND '.join(metadata.TableMetadataV3._make_option_strings(update_options)) query = "ALTER TABLE {0} WITH {1}".format(model.column_family_name(), options) execute(query) return True return False
def _sync_type(ks_name, type_model, omit_subtypes=None): syncd_sub_types = omit_subtypes or set() for field in type_model._fields.values(): udts = [] columns.resolve_udts(field, udts) for udt in [u for u in udts if u not in syncd_sub_types]: _sync_type(ks_name, udt, syncd_sub_types) syncd_sub_types.add(udt) type_name = type_model.type_name() type_name_qualified = "%s.%s" % (ks_name, type_name) cluster = get_cluster() keyspace = cluster.metadata.keyspaces[ks_name] defined_types = keyspace.user_types if type_name not in defined_types: log.debug("sync_type creating new type %s", type_name_qualified) cql = get_create_type(type_model, ks_name) execute(cql) cluster.refresh_user_type_metadata(ks_name, type_name) type_model.register_for_keyspace(ks_name) else: defined_fields = defined_types[type_name].field_names model_fields = set() for field in type_model._fields.values(): model_fields.add(field.db_field_name) if field.db_field_name not in defined_fields: execute("ALTER TYPE {} ADD {}".format(type_name_qualified, field.get_column_def())) type_model.register_for_keyspace(ks_name) if len(defined_fields) == len(model_fields): log.info("Type %s did not require synchronization", type_name_qualified) return db_fields_not_in_model = model_fields.symmetric_difference(defined_fields) if db_fields_not_in_model: log.info("Type %s has fields not referenced by model: %s", type_name_qualified, db_fields_not_in_model)
def sync_materialized_view(mv): viewName = getattr(mv, 'view_name', mv.__name__) partitionKeys = mv.partition_keys primaryKeys = getattr(mv, 'primary_keys', None) basePrimaryKeys = getattr(mv, 'base_primary_keys', ['id']) baseTableName = getattr(mv, 'base_table_name', None) if not baseTableName: baseTableName = viewName.split('_by_')[0] cols = mv._defined_columns # key is col name select = ','.join(colName for colName in cols) where = ' AND '.join('%s IS NOT NULL' % (key) for key in partitionKeys) primary = ['(%s)' % (','.join(partitionKeys))] if primaryKeys: primary.append(','.join(primaryKeys)) primary.append(','.join(basePrimaryKeys)) from cassandra.cqlengine import models keyspace = models.DEFAULT_KEYSPACE connection.execute('use %s;' % (keyspace)) connection.execute('DROP MATERIALIZED VIEW IF EXISTS %s;' % (viewName)) cql = """ CREATE MATERIALIZED VIEW %s AS SELECT %s FROM %s WHERE %s PRIMARY KEY (%s); """ % (viewName, select, baseTableName, where, ','.join(primary)) connection.execute(cql)
def select(table_name, columns): columns_filter = ', '.join([item[0] for item in columns]) values_filter = ' and '.join([\ item[0] + ' = ' + str(item[1])\ if isinstance(item[1], int) or isinstance(item[1], float) else\ item[0] + ' = \'' + str(item[1]) + '\'' \ for item in (item for item in columns if len(item) > 1)\ ]) if len(values_filter) > 0: values_filter = 'where ' + values_filter print('select {2} from {0}.{1} {3} allow filtering;'.format(DB_NAME, table_name, columns_filter, values_filter)) return connection.execute('select {2} from {0}.{1} {3} allow filtering;'.format(DB_NAME, table_name, columns_filter, values_filter)).current_rows
def select(table, params, dicted=False): #print('select * from {0}.{1} where {2} allow filtering;'.format(DB_NAME, table, parse_params(params, ' and '))) parsed_params = parse_params(params, ' and ') result = connection.execute('select * from {0}.{1} {2};'.format( DB_NAME, table, 'where {0} allow filtering'.format(parsed_params) if len(parsed_params) else '')).current_rows if not dicted: return [ namedtuple('Struct', item.keys())(*item.values()) for item in result ] return result
def fill(entity_name, base, batch_size, mongo_ids = [], neo_ids = [], strings = []): data = json.load(open(convert(os.environ['SPACE_SHIP_HOME'] + '/generation/dummyMarket/cassandra json/{0}.json'.format(base), base, mongo_ids = mongo_ids, neo_ids = neo_ids, strings = strings))) number_of_batches = (len(data["data"]) // batch_size) + 1 for i in range(number_of_batches + 1): if i < number_of_batches: query = "BEGIN BATCH {0} APPLY BATCH;".format(' '.join( ["insert into {0}.{1} json '{2}';".format(DB_NAME, entity_name, json.dumps(item)) for item in data["data"][i*batch_size : (i + 1)*batch_size]])) else: query = "BEGIN BATCH {0} APPLY BATCH;".format(' '.join( ["insert into {0}.{1} json '{2}';".format(DB_NAME, entity_name, json.dumps(item)) for item in data["data"][i*batch_size :]])) print(connection.execute(query))
def drop_table(model): """ Drops the table indicated by the model, if it exists. **This function should be used with caution, especially in production environments. Take care to execute schema modifications in a single context (i.e. not concurrently with other clients).** *There are plans to guard schema-modifying functions with an environment-driven conditional.* """ if not _allow_schema_modification(): return # don't try to delete non existant tables meta = get_cluster().metadata ks_name = model._get_keyspace() raw_cf_name = model._raw_column_family_name() try: meta.keyspaces[ks_name].tables[raw_cf_name] execute('DROP TABLE {0};'.format(model.column_family_name())) except KeyError: pass
def drop_table(model): """ Drops the table indicated by the model, if it exists. **This function should be used with caution, especially in production environments. Take care to execute schema modifications in a single context (i.e. not concurrently with other clients).** *There are plans to guard schema-modifying functions with an environment-driven conditional.* """ if not _allow_schema_modification(): return # don't try to delete non existant tables meta = get_cluster().metadata ks_name = model._get_keyspace() raw_cf_name = model._raw_column_family_name() try: meta.keyspaces[ks_name].tables[raw_cf_name] execute('DROP TABLE {};'.format(model.column_family_name())) except KeyError: pass
def get_fields(model): # returns all fields that aren't part of the PK ks_name = model._get_keyspace() col_family = model._raw_column_family_name() field_types = ['regular', 'static'] query = "select * from system.schema_columns where keyspace_name = %s and columnfamily_name = %s" tmp = execute(query, [ks_name, col_family]) # Tables containing only primary keys do not appear to create # any entries in system.schema_columns, as only non-primary-key attributes # appear to be inserted into the schema_columns table try: return [Field(x['column_name'], x['validator']) for x in tmp if x['type'] in field_types] except KeyError: return [Field(x['column_name'], x['validator']) for x in tmp]
def select(table_name, columns): columns_filter = ', '.join([item[0] for item in columns]) values_filter = ' and '.join([\ item[0] + ' = ' + str(item[1])\ if isinstance(item[1], int) or isinstance(item[1], float) else\ item[0] + ' = \'' + str(item[1]) + '\'' \ for item in (item for item in columns if len(item) > 1)\ ]) if len(values_filter) > 0: values_filter = 'where ' + values_filter print('select {2} from {0}.{1} {3} allow filtering;'.format( DB_NAME, table_name, columns_filter, values_filter)) return connection.execute( 'select {2} from {0}.{1} {3} allow filtering;'.format( DB_NAME, table_name, columns_filter, values_filter)).current_rows
def execute(self): if self._executed and self.warn_multiple_exec: msg = "Batch executed multiple times." if self._context_entered: msg += " If using the batch as a context manager, there is no need to call execute directly." warn(msg) self._executed = True if len(self.queries) == 0: # Empty batch is a no-op # except for callbacks self._execute_callbacks() return opener = 'BEGIN ' + (self.batch_type + ' ' if self.batch_type else '') + ' BATCH' if self.timestamp: if isinstance(self.timestamp, six.integer_types): ts = self.timestamp elif isinstance(self.timestamp, (datetime, timedelta)): ts = self.timestamp if isinstance(self.timestamp, timedelta): ts += datetime.now() # Apply timedelta ts = int(time.mktime(ts.timetuple()) * 1e+6 + ts.microsecond) else: raise ValueError("Batch expects a long, a timedelta, or a datetime") opener += ' USING TIMESTAMP {0}'.format(ts) query_list = [opener] parameters = {} ctx_counter = 0 for query in self.queries: query.update_context_id(ctx_counter) ctx = query.get_context() ctx_counter += len(ctx) query_list.append(' ' + str(query)) parameters.update(ctx) query_list.append('APPLY BATCH;') tmp = connection.execute('\n'.join(query_list), parameters, self._consistency, self._timeout) check_applied(tmp) self.queries = [] self._execute_callbacks()
def get_fields(model): # returns all fields that aren't part of the PK ks_name = model._get_keyspace() col_family = model.column_family_name(include_keyspace=False) field_types = ['regular', 'static'] query = "select * from system.schema_columns where keyspace_name = %s and columnfamily_name = %s" tmp = execute(query, [ks_name, col_family]) # Tables containing only primary keys do not appear to create # any entries in system.schema_columns, as only non-primary-key attributes # appear to be inserted into the schema_columns table try: return [ Field(x['column_name'], x['validator']) for x in tmp if x['type'] in field_types ] except KeyError: return [Field(x['column_name'], x['validator']) for x in tmp]
def execute(self): if len(self.queries) == 0: # Empty batch is a no-op # except for callbacks self._execute_callbacks() return opener = "BEGIN " + (self.batch_type + " " if self.batch_type else "") + " BATCH" if self.timestamp: if isinstance(self.timestamp, six.integer_types): ts = self.timestamp elif isinstance(self.timestamp, (datetime, timedelta)): ts = self.timestamp if isinstance(self.timestamp, timedelta): ts += datetime.now() # Apply timedelta ts = int(time.mktime(ts.timetuple()) * 1e6 + ts.microsecond) else: raise ValueError("Batch expects a long, a timedelta, or a datetime") opener += " USING TIMESTAMP {0}".format(ts) query_list = [opener] parameters = {} ctx_counter = 0 for query in self.queries: query.update_context_id(ctx_counter) ctx = query.get_context() ctx_counter += len(ctx) query_list.append(" " + str(query)) parameters.update(ctx) query_list.append("APPLY BATCH;") tmp = connection.execute("\n".join(query_list), parameters, self._consistency, self._timeout) check_applied(tmp) self.queries = [] self._execute_callbacks()
def test_like_operator(self): """ Test to verify the like operator works appropriately @since 3.13 @jira_ticket PYTHON-512 @expected_result the expected row is read using LIKE @test_category data_types:object_mapper """ cluster = Cluster() session = cluster.connect() self.addCleanup(cluster.shutdown) session.execute("""CREATE CUSTOM INDEX text_index ON {} (text) USING 'org.apache.cassandra.index.sasi.SASIIndex';""".format(self.table_name)) self.addCleanup(session.execute, "DROP INDEX {}.text_index".format(DEFAULT_KEYSPACE)) partition = uuid4() cluster = 1 self._insert_statement(partition, cluster) ss = SelectStatement(self.table_name) like_clause = "text_for_%" ss.add_where(Column(db_field='text'), LikeOperator(), like_clause) self.assertEqual(six.text_type(ss), 'SELECT * FROM {} WHERE "text" LIKE %(0)s'.format(self.table_name)) result = execute(ss) self.assertEqual(result[0]["text"], self.text) q = TestQueryUpdateModel.objects.filter(text__like=like_clause).allow_filtering() self.assertEqual(q[0].text, self.text) q = TestQueryUpdateModel.objects.filter(text__like=like_clause) self.assertEqual(q[0].text, self.text)
def sync_table(model): """ Inspects the model and creates / updates the corresponding table and columns. Any User Defined Types used in the table are implicitly synchronized. This function can only add fields that are not part of the primary key. Note that the attributes removed from the model are not deleted on the database. They become effectively ignored by (will not show up on) the model. **This function should be used with caution, especially in production environments. Take care to execute schema modifications in a single context (i.e. not concurrently with other clients).** *There are plans to guard schema-modifying functions with an environment-driven conditional.* """ if not _allow_schema_modification(): return if not issubclass(model, Model): raise CQLEngineException("Models must be derived from base Model.") if model.__abstract__: raise CQLEngineException("cannot create table from abstract model") cf_name = model.column_family_name() raw_cf_name = model._raw_column_family_name() ks_name = model._get_keyspace() cluster = get_cluster() keyspace = cluster.metadata.keyspaces[ks_name] tables = keyspace.tables syncd_types = set() for col in model._columns.values(): udts = [] columns.resolve_udts(col, udts) for udt in [u for u in udts if u not in syncd_types]: _sync_type(ks_name, udt, syncd_types) # check for an existing column family if raw_cf_name not in tables: log.debug("sync_table creating new table %s", cf_name) qs = get_create_table(model) try: execute(qs) except CQLEngineException as ex: # 1.2 doesn't return cf names, so we have to examine the exception # and ignore if it says the column family already exists if "Cannot add already existing column family" not in unicode(ex): raise else: log.debug("sync_table checking existing table %s", cf_name) # see if we're missing any columns fields = get_fields(model) field_names = [x.name for x in fields] model_fields = set() # # TODO: does this work with db_name?? for name, col in model._columns.items(): if col.primary_key or col.partition_key: continue # we can't mess with the PK model_fields.add(name) if col.db_field_name in field_names: continue # skip columns already defined # add missing column using the column def query = "ALTER TABLE {} add {}".format(cf_name, col.get_column_def()) execute(query) db_fields_not_in_model = model_fields.symmetric_difference(field_names) if db_fields_not_in_model: log.info("Table %s has fields not referenced by model: %s", cf_name, db_fields_not_in_model) update_compaction(model) table = cluster.metadata.keyspaces[ks_name].tables[raw_cf_name] indexes = [c for n, c in model._columns.items() if c.index] for column in indexes: if table.columns[column.db_field_name].index: continue qs = ['CREATE INDEX index_{}_{}'.format(raw_cf_name, column.db_field_name)] qs += ['ON {}'.format(cf_name)] qs += ['("{}")'.format(column.db_field_name)] qs = ' '.join(qs) execute(qs)
def test_keyspace(verbose=False): connection.execute("USE {};".format(settings.CASSANDRA_CONNECTION['KEYSPACE'])) if verbose: print("--- Keyspace OK")