def _define_types(ks_name, raw_cf_name): # Define a TextField type to analyze texts (tokenizer, ascii, etc.) try: execute("ALTER SEARCH INDEX SCHEMA ON {0}.{1}" " ADD types.fieldType[@name='TextField'," " @class='org.apache.solr.schema.TextField']" " WITH {2};".format(ks_name, raw_cf_name, TEXT_SEARCH_JSON_SNIPPED)) except Exception as ex: _logger.warning("Maybe te field type has been already" " defined in the schema. Cause: {}".format(ex)) pass # Define a Point and LineString types for geospatial queries try: """ <fieldType name="LocationField" class="solr.SpatialRecursivePrefixTreeFieldType" geo="false" worldBounds="ENVELOPE(-1000, 1000, 1000, -1000)" maxDistErr="0.001" units="degrees" /> """ execute("ALTER SEARCH INDEX SCHEMA ON {0}.{1}" " ADD types.fieldType[@name='LocationField'," " @class='solr.SpatialRecursivePrefixTreeFieldType'," " @geo='false'," " @worldBounds='ENVELOPE(-1000, 1000, 1000, -1000)'," " @maxDistErr='0.001'," " @units='degrees'];".format(ks_name, raw_cf_name)) except Exception as ex: _logger.warning("Maybe te field type has been already" " defined in the schema. Cause: {}".format(ex)) pass
def _process_field(ks_name, table_name, field_name, field_type="StrField", indexed=True, stored=True, multivalued=False, docvalues=False, dynamic=False, add=True): try: if add: execute( "ALTER SEARCH INDEX SCHEMA ON {0}.{1}" " ADD fields.{2}[@name='{3}', @type='{4}'," " @indexed='{5}', @stored='{6}'," " @multiValued='{7}', @docValues='{8}'];".format( ks_name, table_name, "field" if not dynamic else "dynamicField", field_name if not dynamic else "*_{}".format(field_name), field_type, "true" if indexed else "false", "true" if stored else "false", "true" if multivalued else "false", "true" if docvalues else "false")) else: execute("ALTER SEARCH INDEX SCHEMA ON {0}.{1}" " DROP {2}".format(ks_name, table_name, field_name)) except Exception as ex: if add: _logger.warning("Maybe te field has been already" " defined in the schema. Cause: {}".format(ex)) else: _logger.warning("Maybe te field is not defined" " in the schema. Cause: {}".format(ex)) pass
def test_extra_field(self): drop_table(self.TestModel) sync_table(self.TestModel) self.TestModel.create() execute("ALTER TABLE {0} add blah int".format( self.TestModel.column_family_name(include_keyspace=True))) self.TestModel.objects().all()
def __create_keyspace(name, durable_writes, strategy_class, strategy_options, connection=None): cluster = get_cluster(connection) if name not in cluster.metadata.keyspaces: log.info(format_log_context("Creating keyspace %s", connection=connection), name) ks_meta = metadata.KeyspaceMetadata(name, durable_writes, strategy_class, strategy_options) execute(ks_meta.as_cql_query(), connection=connection) else: log.info(format_log_context("Not creating keyspace %s because it already exists", connection=connection), name)
def _update_options(model, connection=None): """Updates the table options for the given model if necessary. :param model: The model to update. :param connection: Name of the connection to use :return: `True`, if the options were modified in Cassandra, `False` otherwise. :rtype: bool """ ks_name = model._get_keyspace() msg = format_log_context("Checking %s for option differences", keyspace=ks_name, connection=connection) log.debug(msg, model) model_options = model.__options__ or {} table_meta = _get_table_metadata(model, connection=connection) # go to CQL string first to normalize meta from different versions existing_option_strings = set( table_meta._make_option_strings(table_meta.options)) existing_options = _options_map_from_strings(existing_option_strings) model_option_strings = metadata.TableMetadataV3._make_option_strings( model_options) model_options = _options_map_from_strings(model_option_strings) update_options = {} for name, value in model_options.items(): try: existing_value = existing_options[name] except KeyError: msg = format_log_context( "Invalid table option: '%s'; known options: %s", keyspace=ks_name, connection=connection) raise KeyError(msg % (name, existing_options.keys())) if isinstance(existing_value, six.string_types): if value != existing_value: update_options[name] = value else: try: for k, v in value.items(): if existing_value[k] != v: update_options[name] = value break except KeyError: update_options[name] = value if update_options: options = ' AND '.join( metadata.TableMetadataV3._make_option_strings(update_options)) query = "ALTER TABLE {0} WITH {1}".format(model.column_family_name(), options) execute(query, connection=connection) return True return False
def _process_copy_field(ks_name, table_name, src, dest, add=True): try: execute( "ALTER SEARCH INDEX SCHEMA ON {0}.{1} {2}" " copyField[@source='{3}', @dest='{4}'];".format(ks_name, table_name, "ADD" if add else "DROP", src, dest) ) except Exception as ex: if add: _logger.warning("Maybe the copy field type has been already" " defined in the schema. Cause: {}".format(ex)) else: _logger.warning("Maybe the copy field type is not avaiable " " in the schema. Cause: {}".format(ex)) pass
def _sync_type(ks_name, type_model, omit_subtypes=None, connection=None): syncd_sub_types = omit_subtypes or set() for field in type_model._fields.values(): udts = [] columns.resolve_udts(field, udts) for udt in [u for u in udts if u not in syncd_sub_types]: _sync_type(ks_name, udt, syncd_sub_types, connection=connection) syncd_sub_types.add(udt) type_name = type_model.type_name() type_name_qualified = "%s.%s" % (ks_name, type_name) cluster = get_cluster(connection) keyspace = cluster.metadata.keyspaces[ks_name] defined_types = keyspace.user_types if type_name not in defined_types: log.debug(format_log_context("sync_type creating new type %s", keyspace=ks_name, connection=connection), type_name_qualified) cql = get_create_type(type_model, ks_name) execute(cql, connection=connection) cluster.refresh_user_type_metadata(ks_name, type_name) type_model.register_for_keyspace(ks_name, connection=connection) else: type_meta = defined_types[type_name] defined_fields = type_meta.field_names model_fields = set() for field in type_model._fields.values(): model_fields.add(field.db_field_name) if field.db_field_name not in defined_fields: execute("ALTER TYPE {0} ADD {1}".format(type_name_qualified, field.get_column_def()), connection=connection) else: field_type = type_meta.field_types[defined_fields.index(field.db_field_name)] if field_type != field.db_type: msg = format_log_context('Existing user type {0} has field "{1}" with a type ({2}) differing from the model user type ({3}).' ' UserType should be updated.', keyspace=ks_name, connection=connection) msg = msg.format(type_name_qualified, field.db_field_name, field_type, field.db_type) warnings.warn(msg) log.warning(msg) type_model.register_for_keyspace(ks_name, connection=connection) if len(defined_fields) == len(model_fields): log.info(format_log_context("Type %s did not require synchronization", keyspace=ks_name, connection=connection), type_name_qualified) return db_fields_not_in_model = model_fields.symmetric_difference(defined_fields) if db_fields_not_in_model: msg = format_log_context("Type %s has fields not referenced by model: %s", keyspace=ks_name, connection=connection) log.info(msg, type_name_qualified, db_fields_not_in_model)
def _drop_table(model, connection=None): if not _allow_schema_modification(): return connection = connection or model._get_connection() # don't try to delete non existant tables meta = get_cluster(connection).metadata ks_name = model._get_keyspace() raw_cf_name = model._raw_column_family_name() try: meta.keyspaces[ks_name].tables[raw_cf_name] execute('DROP TABLE {0};'.format(model.column_family_name()), connection=connection) except KeyError: pass
def _verify_statement(self, original): st = SelectStatement(self.table_name) result = execute(st) response = result[0] for assignment in original.assignments: self.assertEqual(response[assignment.field], assignment.value) self.assertEqual(len(response), 7)
def _define_location(ks_name, raw_cf_name, field_name): # Define a Point and LineString types for geospatial queries try: """ <fieldType name="location" class="solr.LatLonType" subFieldSuffix="_coordinate"/> """ execute( "ALTER SEARCH INDEX SCHEMA ON {0}.{1}" " ADD types.fieldType[@name='location'," " @class='solr.LatLonType'," " @subFieldSuffix='_coordinate'];".format(ks_name, raw_cf_name, field_name) ) _process_field(ks_name, raw_cf_name, "coordinate", "tdouble", stored=False, dynamic=True) except Exception as ex: _logger.warning("Maybe te field type has been already" " defined in the schema. Cause: {}".format(ex)) pass
def test_insert_statement_execute(self): """ Test to verify the execution of BaseCQLStatements using connection.execute @since 3.10 @jira_ticket PYTHON-505 @expected_result inserts a row in C*, updates the rows and then deletes all the rows using BaseCQLStatements @test_category data_types:object_mapper """ partition = uuid4() cluster = 1 #Verifying insert statement st = InsertStatement(self.table_name) st.add_assignment(Column(db_field='partition'), partition) st.add_assignment(Column(db_field='cluster'), cluster) st.add_assignment(Column(db_field='count'), 1) st.add_assignment(Column(db_field='text'), "text_for_db") st.add_assignment(Column(db_field='text_set'), set(("foo", "bar"))) st.add_assignment(Column(db_field='text_list'), ["foo", "bar"]) st.add_assignment(Column(db_field='text_map'), { "foo": '1', "bar": '2' }) execute(st) self._verify_statement(st) # Verifying update statement where = [ WhereClause('partition', EqualsOperator(), partition), WhereClause('cluster', EqualsOperator(), cluster) ] st = UpdateStatement(self.table_name, where=where) st.add_assignment(Column(db_field='count'), 2) st.add_assignment(Column(db_field='text'), "text_for_db_update") st.add_assignment(Column(db_field='text_set'), set(("foo_update", "bar_update"))) st.add_assignment(Column(db_field='text_list'), ["foo_update", "bar_update"]) st.add_assignment(Column(db_field='text_map'), { "foo": '3', "bar": '4' }) execute(st) self._verify_statement(st) # Verifying delete statement execute(DeleteStatement(self.table_name, where=where)) self.assertEqual(TestQueryUpdateModel.objects.count(), 0)
def _create_index(model, index, connection=None): if not management._allow_schema_modification(): return connection = connection or model._get_connection() # don't try to create indexes in non existant tables meta = management.get_cluster(connection).metadata ks_name = model._get_keyspace() raw_cf_name = model._raw_column_family_name() try: _logger.info( "Creating SEARCH INDEX if not exists for model: {}".format(model)) meta.keyspaces[ks_name].tables[raw_cf_name] execute("CREATE SEARCH INDEX IF NOT EXISTS ON {0}.{1};".format( ks_name, raw_cf_name), timeout=30.0) if hasattr(index.Meta, "index_settings"): for param, value in index.Meta.index_settings.items(): _logger.info("Setting index parameters: {0} = {1}".format( param, value)) execute("ALTER SEARCH INDEX CONFIG ON {0}.{1}" " SET {2} = {3};".format(ks_name, raw_cf_name, param, value)) _define_types(ks_name, raw_cf_name) search_fields = [ attr for attr in index.__class__.__dict__.values() if issubclass(attr.__class__, fields.SearchField) ] document_fields = [] for search_field in search_fields: # If the field do not have a direct mapping with the model if not search_field.model_attr: continue _logger.info("Processing field field {0}({1})".format( search_field.__class__, search_field.model_attr)) # force the creation of the field if it does not exists yet (the # original table has been changed after the index was created try: _process_field(ks_name, raw_cf_name, search_field.model_attr, field_type=_get_solr_type( model, index, search_field), multivalued=search_field.is_multivalued, stored=True, indexed=True, docvalues=False) except Exception as ex: _logger.warning("Maybe te field has been already" " created in the schema. Cause: {}".format(ex)) # https://docs.datastax.com/en/ # datastax_enterprise/5.0/ # datastax_enterprise/srch/queriesGeoSpatial.html # <fieldType name="location" class="solr.LatLonType" # subFieldSuffix="_coordinate"/> if issubclass(search_field.__class__, fields.LocationField): execute("ALTER SEARCH INDEX SCHEMA ON {0}.{1}" " SET fields.field[@name='{2}']@type='LocationField';". format(ks_name, raw_cf_name, search_field.model_attr)) continue # Facet fields if search_field.faceted: # We need to create a <field>_exact field in Solr with # docValues=true and that will receive the data from # the original <field> (copyFrom) # This <field>_exact field is the one used by Hasystak to # do the facet queries _process_field(ks_name, raw_cf_name, "{}_exact".format(search_field.model_attr), field_type=_get_solr_type( model, index, search_field), multivalued=search_field.is_multivalued, stored=False, docvalues=True) _process_copy_field(ks_name, raw_cf_name, search_field.model_attr, "{}_exact".format(search_field.model_attr)) # Get a reference to the model column definition attribute = getattr(model, search_field.model_attr, None) # Indexed field? if not (attribute and isinstance(attribute.column, columns.Map)): if search_field.indexed: execute("ALTER SEARCH INDEX SCHEMA ON {0}.{1}" " SET fields.field[@name='{2}']@indexed='true';". format(ks_name, raw_cf_name, search_field.model_attr)) else: execute("ALTER SEARCH INDEX SCHEMA ON {0}.{1}" " SET fields.field[@name='{2}']@indexed='false';". format(ks_name, raw_cf_name, search_field.model_attr)) # Facet field?: force docValues=true if not (attribute and isinstance(attribute.column, columns.Map)): if search_field.is_multivalued: execute( "ALTER SEARCH INDEX SCHEMA ON {0}.{1}" " SET fields.field[@name='{2}']@multiValued='true';". format(ks_name, raw_cf_name, search_field.model_attr)) else: execute( "ALTER SEARCH INDEX SCHEMA ON {0}.{1}" " SET fields.field[@name='{2}']@multiValued='false';". format(ks_name, raw_cf_name, search_field.model_attr)) # All the document fields have to be TextFields to be # processed as tokens if not (attribute and isinstance(attribute.column, columns.Map)): if issubclass(search_field.__class__, fields.CharField): if search_field.model_attr in index.Meta.text_fields: _logger.info( "Changing SEARCH INDEX field {0} to TextField". format(search_field.model_attr)) execute( "ALTER SEARCH INDEX SCHEMA ON {0}.{1} " " SET fields.field[@name='{2}']@type='TextField';". format(ks_name, raw_cf_name, search_field.model_attr)) document_fields.append(search_field) # else: # execute( # "ALTER SEARCH INDEX SCHEMA ON {0}.{1} " # " SET fields.field[@name='{2}']@type='StrField';". # format( # ks_name, # raw_cf_name, # search_field.model_attr)) # If there are document fields we need to copy all them into # the text field if len(document_fields): _process_field(ks_name, raw_cf_name, "text", field_type="TextField", stored=False, multivalued=True) for document_field in document_fields: _process_copy_field(ks_name, raw_cf_name, document_field.model_attr, "text") # Reload the index for the changes to take effect execute("RELOAD SEARCH INDEX ON {0}.{1};".format(ks_name, raw_cf_name)) except KeyError: _logger.exception("Unable to create the search index") pass
def _sync_table(model, connection=None): if not _allow_schema_modification(): return if not issubclass(model, Model): raise CQLEngineException("Models must be derived from base Model.") if model.__abstract__: raise CQLEngineException("cannot create table from abstract model") cf_name = model.column_family_name() raw_cf_name = model._raw_column_family_name() ks_name = model._get_keyspace() connection = connection or model._get_connection() cluster = get_cluster(connection) try: keyspace = cluster.metadata.keyspaces[ks_name] except KeyError: msg = format_log_context("Keyspace '{0}' for model {1} does not exist.", connection=connection) raise CQLEngineException(msg.format(ks_name, model)) tables = keyspace.tables syncd_types = set() for col in model._columns.values(): udts = [] columns.resolve_udts(col, udts) for udt in [u for u in udts if u not in syncd_types]: _sync_type(ks_name, udt, syncd_types, connection=connection) if raw_cf_name not in tables: log.debug(format_log_context("sync_table creating new table %s", keyspace=ks_name, connection=connection), cf_name) qs = _get_create_table(model) try: execute(qs, connection=connection) except CQLEngineException as ex: # 1.2 doesn't return cf names, so we have to examine the exception # and ignore if it says the column family already exists if "Cannot add already existing column family" not in unicode(ex): raise else: log.debug(format_log_context("sync_table checking existing table %s", keyspace=ks_name, connection=connection), cf_name) table_meta = tables[raw_cf_name] _validate_pk(model, table_meta) table_columns = table_meta.columns model_fields = set() for model_name, col in model._columns.items(): db_name = col.db_field_name model_fields.add(db_name) if db_name in table_columns: col_meta = table_columns[db_name] if col_meta.cql_type != col.db_type: msg = format_log_context('Existing table {0} has column "{1}" with a type ({2}) differing from the model type ({3}).' ' Model should be updated.', keyspace=ks_name, connection=connection) msg = msg.format(cf_name, db_name, col_meta.cql_type, col.db_type) warnings.warn(msg) log.warning(msg) continue if col.primary_key or col.primary_key: msg = format_log_context("Cannot add primary key '{0}' (with db_field '{1}') to existing table {2}", keyspace=ks_name, connection=connection) raise CQLEngineException(msg.format(model_name, db_name, cf_name)) query = "ALTER TABLE {0} add {1}".format(cf_name, col.get_column_def()) execute(query, connection=connection) db_fields_not_in_model = model_fields.symmetric_difference(table_columns) if db_fields_not_in_model: msg = format_log_context("Table {0} has fields not referenced by model: {1}", keyspace=ks_name, connection=connection) log.info(msg.format(cf_name, db_fields_not_in_model)) _update_options(model, connection=connection) table = cluster.metadata.keyspaces[ks_name].tables[raw_cf_name] indexes = [c for n, c in model._columns.items() if c.index] # TODO: support multiple indexes in C* 3.0+ for column in indexes: index_name = _get_index_name_by_column(table, column.db_field_name) if index_name: continue qs = ['CREATE INDEX'] qs += ['ON {0}'.format(cf_name)] qs += ['("{0}")'.format(column.db_field_name)] qs = ' '.join(qs) execute(qs, connection=connection)
def _drop_keyspace(name, connection=None): cluster = get_cluster(connection) if name in cluster.metadata.keyspaces: execute("DROP KEYSPACE {0}".format(metadata.protect_name(name)), connection=connection)
def _define_types(ks_name, raw_cf_name): # Define a TextField type to analyze texts (tokenizer, ascii, etc.) try: execute( f"ALTER SEARCH INDEX SCHEMA ON {ks_name}.{raw_cf_name}" f" ADD types.fieldType[@name='TextField'," f" @class='org.apache.solr.schema.TextField']" f" WITH {TEXT_SEARCH_JSON_SNIPPED};" ) except Exception as ex: _logger.warning("Maybe te field type TextField has been already" " defined in the schema. Cause: {}".format(ex)) pass try: execute( f"ALTER SEARCH INDEX SCHEMA ON {ks_name}.{raw_cf_name}" f" ADD types.fieldType[@name='ISCStrField'," f" @class='org.apache.solr.schema.TextField']" f" WITH {STR_SEARCH_JSON_SNIPPED};" ) except Exception as ex: _logger.warning( "Maybe te field type ISCStrField has been already" " defined in the schema. Cause: {}".format(ex) ) pass types = ["TupleField", "SimpleDateField"] for type in types: try: execute( f"ALTER SEARCH INDEX SCHEMA ON {ks_name}.{raw_cf_name}" f" ADD types.fieldType[@name='{type}'," f" @class='com.datastax.bdp.search.solr.core.types.{type}'];" ) except Exception as ex: _logger.warning(f"Maybe te field type {type} has been already" " defined in the schema. Cause: {ex}") pass types = ["TrieLongField", "TrieDoubleField", "TrieIntField", "BoolField", "UUIDField", "TrieDateField"] for type in types: # Define a TrieFloatField type to analyze texts (tokenizer, ascii, etc.) try: execute( f"ALTER SEARCH INDEX SCHEMA ON {ks_name}.{raw_cf_name}" f" ADD types.fieldType[@name='{type}'," f" @class='org.apache.solr.schema.{type}'];" ) except Exception as ex: _logger.warning(f"Maybe te field type {type} has been already" " defined in the schema. Cause: {ex}") pass # Define a Point and LineString types for geospatial queries try: """ <fieldType name="LocationField" class="solr.SpatialRecursivePrefixTreeFieldType" geo="false" worldBounds="ENVELOPE(-1000, 1000, 1000, -1000)" maxDistErr="0.001" units="degrees" /> """ execute( "ALTER SEARCH INDEX SCHEMA ON {0}.{1}" " ADD types.fieldType[@name='LocationField'," " @class='solr.SpatialRecursivePrefixTreeFieldType'," " @geo='false'," " @worldBounds='ENVELOPE(-1000, 1000, 1000, -1000)'," " @maxDistErr='0.001'," " @units='degrees'];".format(ks_name, raw_cf_name) ) except Exception as ex: _logger.warning("Maybe te field type has been already" " defined in the schema. Cause: {}".format(ex)) pass
def _drop_unnecessary_indexes(ks_name, table_name, fieldsname): for fieldname in fieldsname: try: execute(f"ALTER SEARCH INDEX SCHEMA ON {ks_name}.{table_name} DROP field {fieldname}") except Exception as ex: _logger.warning(f"Unable to remove unnecessary field {fieldname}. Cause: {ex}")