def set_synonym_references_for_old_mapped_values(session: Session) -> None:
    synonym_value_table = migration_utils.get_reflected_table(
        "entity_synonym_value", session)
    synonym_values = session.execute(synonym_value_table.select()).fetchall()

    entities_table = migration_utils.get_reflected_table(
        "nlu_training_data_entity", session)
    for synonym_value in synonym_values:
        update_query = (sa.update(entities_table).where(
            entities_table.c.original_value == synonym_value.name).values(
                entity_synonym_id=synonym_value.entity_synonym_id))
        session.execute(update_query)
    session.commit()
示例#2
0
def upgrade():
    for column, info in COLUMNS.items():
        migration_utils.create_column(TABLE_NAME, sa.Column(column, info["type"]))

    bind = op.get_bind()
    session = sa.orm.Session(bind=bind)

    events_table = migration_utils.get_reflected_table(TABLE_NAME, session)

    for row in session.query(events_table).yield_per(BULK_SIZE):
        if row.type_name != "slot":
            continue

        event = json.loads(row.data)
        slot_name = event.get("name")
        slot_value = event.get("value")

        if slot_name == REQUESTED_SLOT:
            continue

        query = (
            sa.update(events_table)
            .where(events_table.c.id == row.id)
            .values(
                # Sort keys so that equivalent values can be discarded using DISTINCT
                # in an SQL query
                slot_name=slot_name,
                slot_value=json.dumps(slot_value, sort_keys=True),
            )
        )
        session.execute(query)

    with op.batch_alter_table(TABLE_NAME) as batch_op:
        # Only create index for `slot_name` column, see migration 3fbc8790762e
        # to see why.
        batch_op.create_index(COLUMNS["slot_name"]["index"], ["slot_name"])

    session.commit()
def downgrade():
    session = Session(bind=op.get_bind())

    with op.batch_alter_table("entity_synonym_value") as batch_op:
        batch_op.drop_index(INDEX_NAME)

    op.create_table(
        "entity_synonym_tmp",
        sa.Column("name", sa.String(255), nullable=False),
        sa.Column("synonym", sa.String(255), nullable=False),
        sa.Column("project_id", sa.String(255), nullable=False),
        sa.Column("filename", sa.String(255), nullable=True),
        sa.ForeignKeyConstraint(["project_id"], ["project.project_id"]),
        sa.PrimaryKeyConstraint("name", "synonym", "project_id"),
    )

    synonym_value_table = migration_utils.get_reflected_table(
        "entity_synonym_value", session)
    entity_synonym_values = session.execute(
        synonym_value_table.select()).fetchall()

    for entity_synonym_value in entity_synonym_values:
        session.execute(
            "insert into entity_synonym_tmp "
            "values (:name, :synonym, :project_id, :filename)",
            {
                "name": entity_synonym_value.name,
                "synonym": entity_synonym_value.entity_synonym.name,
                "project_id": entity_synonym_value.entity_synonym.project_id,
                "filename": entity_synonym_value.entity_synonym.filename,
            },
        )

    op.drop_table("entity_synonym_value")
    op.drop_table("entity_synonym")
    op.rename_table("entity_synonym_tmp", "entity_synonym")

    session.commit()
def extract_information_from_existing_data():
    bind = op.get_bind()
    session = sa.orm.Session(bind=bind)

    conversation_table = migration_utils.get_reflected_table(
        CONVERSATION, session)
    conversation_event_table = migration_utils.get_reflected_table(
        CONVERSATION_EVENT, session)

    for conversation in session.query(conversation_table).yield_per(BULK_SIZE):

        min_intent_confidence = 1.0
        max_intent_confidence = 0.0
        min_action_confidence = 1.0
        max_action_confidence = 0.0

        for row in (session.query(conversation_event_table).filter(
                conversation_event_table.c.conversation_id ==
                conversation.sender_id).yield_per(BULK_SIZE)):
            data = json.loads(row.data)

            if row.type_name == "user":
                confidence = (data.get("parse_data",
                                       {}).get("intent", {}).get("confidence"))

                try:
                    confidence = float(confidence)
                except (ValueError, TypeError):
                    continue

                min_intent_confidence = min(min_intent_confidence, confidence)
                max_intent_confidence = max(max_intent_confidence, confidence)
            elif row.type_name == "action":
                confidence = data.get("confidence")

                try:
                    confidence = float(confidence)
                except (ValueError, TypeError):
                    continue

                min_action_confidence = min(min_action_confidence, confidence)
                max_action_confidence = max(max_action_confidence, confidence)

        if min_intent_confidence == 1.0:
            min_intent_confidence = None
        if max_intent_confidence == 0.0:
            max_intent_confidence = None
        if min_action_confidence == 1.0:
            min_action_confidence = None
        if max_action_confidence == 0.0:
            max_action_confidence = None

        query = (sa.update(conversation_table).where(
            conversation_table.c.sender_id == conversation.sender_id).values(
                minimum_intent_confidence=min_intent_confidence,
                maximum_intent_confidence=max_intent_confidence,
                minimum_action_confidence=min_action_confidence,
                maximum_action_confidence=max_action_confidence,
            ))
        session.execute(query)

    session.commit()
def upgrade():
    # New schema:
    #
    #   EntitySynonym:
    #       Contains the reference values (i.e. what synonyms will be converted into
    #       when found in a message by the NLU functions). The string will be stored in
    #       column "name". (example: "New York")
    #
    #   EntitySynonymValue:
    #       Contains synonyms for values in EntitySynonym. The string value will be
    #       stored in column "name". (examples: "NYC", "NY")

    session = Session(bind=op.get_bind())

    with warnings.catch_warnings():
        warnings.simplefilter("ignore", category=sqlalchemy.exc.SAWarning)
        # Add a unique ID to EntitySynonym
        with op.batch_alter_table("entity_synonym") as batch_op:
            batch_op.add_column(sa.Column("id", sa.Integer, primary_key=True))
            migration_utils.create_primary_key(batch_op, "entity_synonym",
                                               ["id"], "entity_synonym_pk")

    # create sequence on this table to be used for the `id` column
    migration_utils.create_sequence("entity_synonym")

    # Create the new table for storing the entity synonym mapped values
    op.create_table(
        "entity_synonym_value",
        sa.Column("id", sa.Integer(), nullable=False),
        sa.Column("entity_synonym_id", sa.Integer(), nullable=True),
        sa.Column("name", sa.String(255), nullable=True),
        sa.ForeignKeyConstraint(["entity_synonym_id"], ["entity_synonym.id"],
                                ondelete="cascade"),
        sa.PrimaryKeyConstraint("id"),
    )

    # create sequence for this new table
    migration_utils.create_sequence("entity_synonym_value")

    min_ids = set()
    synonym_value_table = migration_utils.get_reflected_table(
        "entity_synonym_value", session)

    for row in session.execute("select * from entity_synonym"):
        # Each reference value can be repeated various times in the table, so just
        # take the one with the smallest ID and treat that ID as the "real" one.
        min_id = session.scalar(
            "select min(id) from entity_synonym where synonym = :synonym",
            {"synonym": row["synonym"]},
        )
        min_ids.add(min_id)

        # Copy the synonyms previously stored in EntitySynonym over to
        # EntitySynonymValue
        insert_query = synonym_value_table.insert().values(
            entity_synonym_id=min_id, name=row["name"])
        session.execute(insert_query)

    with op.batch_alter_table("entity_synonym") as batch_op:
        # Make column "name" have the reference value (currently, it's in
        # "synyonym"). The synonyms themselves have already been added to the
        # EntitySynonymValue table.
        batch_op.drop_column("name")
        batch_op.alter_column(
            "synonym",
            new_column_name="name",
            # The following arguments are required for MySQL
            existing_type=sa.String(),
            existing_server_default=None,
            existing_nullable=True,
        )

    # Now that the synonyms have been copied into EntitySynonymValue, and that the
    # EntitySynonym has been modified correctly, delete all repeated entries from it.
    synonym_table = migration_utils.get_reflected_table(
        "entity_synonym", session)
    delete_query = synonym_table.delete().where(
        synonym_table.c.id.notin_(min_ids))
    session.execute(delete_query)

    with op.batch_alter_table("entity_synonym_value") as batch_op:
        batch_op.create_index(INDEX_NAME, ["name"])

    session.commit()