def set_synonym_references_for_old_mapped_values(session: Session) -> None: synonym_value_table = migration_utils.get_reflected_table( "entity_synonym_value", session) synonym_values = session.execute(synonym_value_table.select()).fetchall() entities_table = migration_utils.get_reflected_table( "nlu_training_data_entity", session) for synonym_value in synonym_values: update_query = (sa.update(entities_table).where( entities_table.c.original_value == synonym_value.name).values( entity_synonym_id=synonym_value.entity_synonym_id)) session.execute(update_query) session.commit()
def upgrade(): for column, info in COLUMNS.items(): migration_utils.create_column(TABLE_NAME, sa.Column(column, info["type"])) bind = op.get_bind() session = sa.orm.Session(bind=bind) events_table = migration_utils.get_reflected_table(TABLE_NAME, session) for row in session.query(events_table).yield_per(BULK_SIZE): if row.type_name != "slot": continue event = json.loads(row.data) slot_name = event.get("name") slot_value = event.get("value") if slot_name == REQUESTED_SLOT: continue query = ( sa.update(events_table) .where(events_table.c.id == row.id) .values( # Sort keys so that equivalent values can be discarded using DISTINCT # in an SQL query slot_name=slot_name, slot_value=json.dumps(slot_value, sort_keys=True), ) ) session.execute(query) with op.batch_alter_table(TABLE_NAME) as batch_op: # Only create index for `slot_name` column, see migration 3fbc8790762e # to see why. batch_op.create_index(COLUMNS["slot_name"]["index"], ["slot_name"]) session.commit()
def downgrade(): session = Session(bind=op.get_bind()) with op.batch_alter_table("entity_synonym_value") as batch_op: batch_op.drop_index(INDEX_NAME) op.create_table( "entity_synonym_tmp", sa.Column("name", sa.String(255), nullable=False), sa.Column("synonym", sa.String(255), nullable=False), sa.Column("project_id", sa.String(255), nullable=False), sa.Column("filename", sa.String(255), nullable=True), sa.ForeignKeyConstraint(["project_id"], ["project.project_id"]), sa.PrimaryKeyConstraint("name", "synonym", "project_id"), ) synonym_value_table = migration_utils.get_reflected_table( "entity_synonym_value", session) entity_synonym_values = session.execute( synonym_value_table.select()).fetchall() for entity_synonym_value in entity_synonym_values: session.execute( "insert into entity_synonym_tmp " "values (:name, :synonym, :project_id, :filename)", { "name": entity_synonym_value.name, "synonym": entity_synonym_value.entity_synonym.name, "project_id": entity_synonym_value.entity_synonym.project_id, "filename": entity_synonym_value.entity_synonym.filename, }, ) op.drop_table("entity_synonym_value") op.drop_table("entity_synonym") op.rename_table("entity_synonym_tmp", "entity_synonym") session.commit()
def extract_information_from_existing_data(): bind = op.get_bind() session = sa.orm.Session(bind=bind) conversation_table = migration_utils.get_reflected_table( CONVERSATION, session) conversation_event_table = migration_utils.get_reflected_table( CONVERSATION_EVENT, session) for conversation in session.query(conversation_table).yield_per(BULK_SIZE): min_intent_confidence = 1.0 max_intent_confidence = 0.0 min_action_confidence = 1.0 max_action_confidence = 0.0 for row in (session.query(conversation_event_table).filter( conversation_event_table.c.conversation_id == conversation.sender_id).yield_per(BULK_SIZE)): data = json.loads(row.data) if row.type_name == "user": confidence = (data.get("parse_data", {}).get("intent", {}).get("confidence")) try: confidence = float(confidence) except (ValueError, TypeError): continue min_intent_confidence = min(min_intent_confidence, confidence) max_intent_confidence = max(max_intent_confidence, confidence) elif row.type_name == "action": confidence = data.get("confidence") try: confidence = float(confidence) except (ValueError, TypeError): continue min_action_confidence = min(min_action_confidence, confidence) max_action_confidence = max(max_action_confidence, confidence) if min_intent_confidence == 1.0: min_intent_confidence = None if max_intent_confidence == 0.0: max_intent_confidence = None if min_action_confidence == 1.0: min_action_confidence = None if max_action_confidence == 0.0: max_action_confidence = None query = (sa.update(conversation_table).where( conversation_table.c.sender_id == conversation.sender_id).values( minimum_intent_confidence=min_intent_confidence, maximum_intent_confidence=max_intent_confidence, minimum_action_confidence=min_action_confidence, maximum_action_confidence=max_action_confidence, )) session.execute(query) session.commit()
def upgrade(): # New schema: # # EntitySynonym: # Contains the reference values (i.e. what synonyms will be converted into # when found in a message by the NLU functions). The string will be stored in # column "name". (example: "New York") # # EntitySynonymValue: # Contains synonyms for values in EntitySynonym. The string value will be # stored in column "name". (examples: "NYC", "NY") session = Session(bind=op.get_bind()) with warnings.catch_warnings(): warnings.simplefilter("ignore", category=sqlalchemy.exc.SAWarning) # Add a unique ID to EntitySynonym with op.batch_alter_table("entity_synonym") as batch_op: batch_op.add_column(sa.Column("id", sa.Integer, primary_key=True)) migration_utils.create_primary_key(batch_op, "entity_synonym", ["id"], "entity_synonym_pk") # create sequence on this table to be used for the `id` column migration_utils.create_sequence("entity_synonym") # Create the new table for storing the entity synonym mapped values op.create_table( "entity_synonym_value", sa.Column("id", sa.Integer(), nullable=False), sa.Column("entity_synonym_id", sa.Integer(), nullable=True), sa.Column("name", sa.String(255), nullable=True), sa.ForeignKeyConstraint(["entity_synonym_id"], ["entity_synonym.id"], ondelete="cascade"), sa.PrimaryKeyConstraint("id"), ) # create sequence for this new table migration_utils.create_sequence("entity_synonym_value") min_ids = set() synonym_value_table = migration_utils.get_reflected_table( "entity_synonym_value", session) for row in session.execute("select * from entity_synonym"): # Each reference value can be repeated various times in the table, so just # take the one with the smallest ID and treat that ID as the "real" one. min_id = session.scalar( "select min(id) from entity_synonym where synonym = :synonym", {"synonym": row["synonym"]}, ) min_ids.add(min_id) # Copy the synonyms previously stored in EntitySynonym over to # EntitySynonymValue insert_query = synonym_value_table.insert().values( entity_synonym_id=min_id, name=row["name"]) session.execute(insert_query) with op.batch_alter_table("entity_synonym") as batch_op: # Make column "name" have the reference value (currently, it's in # "synyonym"). The synonyms themselves have already been added to the # EntitySynonymValue table. batch_op.drop_column("name") batch_op.alter_column( "synonym", new_column_name="name", # The following arguments are required for MySQL existing_type=sa.String(), existing_server_default=None, existing_nullable=True, ) # Now that the synonyms have been copied into EntitySynonymValue, and that the # EntitySynonym has been modified correctly, delete all repeated entries from it. synonym_table = migration_utils.get_reflected_table( "entity_synonym", session) delete_query = synonym_table.delete().where( synonym_table.c.id.notin_(min_ids)) session.execute(delete_query) with op.batch_alter_table("entity_synonym_value") as batch_op: batch_op.create_index(INDEX_NAME, ["name"]) session.commit()