示例#1
0
def process_nonpatient_table(table: Table, engine: Engine,
                             progargs: Any) -> None:
    if progargs.rcep:
        return
    log.info("Preprocessing non-patient table {}".format(repr(table.name)))
    pk_col = get_effective_int_pk_col(table)
    other_pk_col = pk_col if pk_col != CRATE_COL_PK else None
    if other_pk_col:  # table has a primary key already
        crate_pk_col = Column(CRATE_COL_PK, BigInteger, nullable=True)
    else:
        crate_pk_col = make_bigint_autoincrement_column(
            CRATE_COL_PK, engine.dialect)
    table.append_column(crate_pk_col)  # must be Table-bound, as above
    add_columns(engine, table, [crate_pk_col])
    if not progargs.print:
        ensure_columns_present(engine,
                               tablename=table.name,
                               column_names=[CRATE_COL_PK])
    if other_pk_col:
        execute(
            engine, """
            UPDATE {tablename} SET {crate_pk} = {rio_pk}
            WHERE {crate_pk} IS NULL
        """.format(tablename=table.name,
                   crate_pk=CRATE_COL_PK,
                   rio_pk=other_pk_col))
    add_indexes(engine, table, [{
        'index_name': CRATE_IDX_PK,
        'column': CRATE_COL_PK,
        'unique': True
    }])
示例#2
0
def process_master_patient_table(table: Table, engine: Engine,
                                 progargs: Any) -> None:
    crate_col_nhs_number = Column(CRATE_COL_NHS_NUMBER,
                                  BigInteger,
                                  nullable=True)
    table.append_column(crate_col_nhs_number)
    add_columns(engine, table, [crate_col_nhs_number])
    if progargs.rcep:
        nhscol = RCEP_COL_NHS_NUMBER
    else:
        nhscol = RIO_COL_NHS_NUMBER
    log.info("Table {}: updating column {}".format(repr(table.name),
                                                   repr(nhscol)))
    ensure_columns_present(engine, tablename=table.name, column_names=[nhscol])
    if not progargs.print:
        ensure_columns_present(engine,
                               tablename=table.name,
                               column_names=[CRATE_COL_NHS_NUMBER])
    execute(
        engine, """
        UPDATE {tablename} SET
            {nhs_number_int} = CAST({nhscol} AS BIGINT)
            WHERE {nhs_number_int} IS NULL
    """.format(
            tablename=table.name,
            nhs_number_int=CRATE_COL_NHS_NUMBER,
            nhscol=nhscol,
        ))
示例#3
0
def process_clindocs_table(table: Table, engine: Engine,
                           progargs: Any) -> None:
    # For RiO only, not RCEP
    crate_col_max_docver = Column(CRATE_COL_MAX_DOCVER, Integer, nullable=True)
    crate_col_last_doc = Column(CRATE_COL_LAST_DOC, Integer, nullable=True)
    table.append_column(crate_col_max_docver)
    table.append_column(crate_col_last_doc)
    add_columns(engine, table, [crate_col_max_docver, crate_col_last_doc])
    add_indexes(engine, table, [
        {
            'index_name':
            CRATE_IDX_RIONUM_SERIALNUM,
            'column':
            '{rio_number}, SerialNumber'.format(
                rio_number=CRATE_COL_RIO_NUMBER),
        },
        {
            'index_name': CRATE_IDX_MAX_DOCVER,
            'column': CRATE_COL_MAX_DOCVER,
        },
        {
            'index_name': CRATE_IDX_LAST_DOC,
            'column': CRATE_COL_LAST_DOC,
        },
    ])

    required_cols = ["SerialNumber", "RevisionID"]
    if not progargs.print:
        required_cols.extend(
            [CRATE_COL_MAX_DOCVER, CRATE_COL_LAST_DOC, CRATE_COL_RIO_NUMBER])
    ensure_columns_present(engine,
                           tablename=table.name,
                           column_names=required_cols)

    # Find the maximum SerialNumber for each note, and store it.
    # Slow query, even with index.
    log.info("Clinical documents table {}: updating {}".format(
        repr(table.name), repr(CRATE_COL_MAX_DOCVER)))
    execute(
        engine, """
        UPDATE p1
        SET p1.{max_docver_col} = subq.max_docver
        FROM {tablename} p1 JOIN (
            SELECT {rio_number}, SerialNumber, MAX(RevisionID) AS max_docver
            FROM {tablename} p2
            GROUP BY {rio_number}, SerialNumber
        ) subq
        ON subq.{rio_number} = p1.{rio_number}
        AND subq.SerialNumber = p1.SerialNumber
        WHERE p1.{max_docver_col} IS NULL
    """.format(
            max_docver_col=CRATE_COL_MAX_DOCVER,
            tablename=table.name,
            rio_number=CRATE_COL_RIO_NUMBER,
        ))

    # Set a single column accordingly
    log.info("Clinical documents table {}: updating {}".format(
        repr(table.name), repr(CRATE_COL_LAST_DOC)))
    execute(
        engine, """
        UPDATE {tablename} SET
            {last_doc_col} =
                CASE
                    WHEN RevisionID = {max_docver_col} THEN 1
                    ELSE 0
                END
        WHERE {last_doc_col} IS NULL
    """.format(
            tablename=table.name,
            last_doc_col=CRATE_COL_LAST_DOC,
            max_docver_col=CRATE_COL_MAX_DOCVER,
        ))
示例#4
0
def process_progress_notes(table: Table, engine: Engine,
                           progargs: Any) -> None:
    crate_col_max_subnum = Column(CRATE_COL_MAX_SUBNUM, Integer, nullable=True)
    crate_col_last_note = Column(CRATE_COL_LAST_NOTE, Integer, nullable=True)
    table.append_column(crate_col_max_subnum)
    table.append_column(crate_col_last_note)
    add_columns(engine, table, [crate_col_max_subnum, crate_col_last_note])
    # We're always in "RiO land", not "RCEP land", for this one.
    add_indexes(engine, table, [
        {  # Joint index, for JOIN in UPDATE statement below
            'index_name': CRATE_IDX_RIONUM_NOTENUM,
            'column': '{rio_number}, NoteNum'.format(
                rio_number=CRATE_COL_RIO_NUMBER),
        },
        {  # Speeds up WHERE below. (Much, much faster for second run.)
            'index_name': CRATE_IDX_MAX_SUBNUM,
            'column': CRATE_COL_MAX_SUBNUM,
        },
        {  # Speeds up WHERE below. (Much, much faster for second run.)
            'index_name': CRATE_IDX_LAST_NOTE,
            'column': CRATE_COL_LAST_NOTE,
        },
    ])

    ensure_columns_present(
        engine,
        tablename=table.name,
        column_names=["NoteNum", "SubNum", "EnteredInError", "EnteredInError"])
    if not progargs.print:
        ensure_columns_present(engine,
                               tablename=table.name,
                               column_names=[
                                   CRATE_COL_MAX_SUBNUM, CRATE_COL_LAST_NOTE,
                                   CRATE_COL_RIO_NUMBER
                               ])

    # Find the maximum SubNum for each note, and store it.
    # Slow query, even with index.
    log.info("Progress notes table {}: updating {}".format(
        repr(table.name), repr(CRATE_COL_MAX_SUBNUM)))
    execute(
        engine, """
        UPDATE p1
        SET p1.{max_subnum_col} = subq.max_subnum
        FROM {tablename} p1 JOIN (
            SELECT {rio_number}, NoteNum, MAX(SubNum) AS max_subnum
            FROM {tablename} p2
            GROUP BY {rio_number}, NoteNum
        ) subq
        ON subq.{rio_number} = p1.{rio_number}
        AND subq.NoteNum = p1.NoteNum
        WHERE p1.{max_subnum_col} IS NULL
    """.format(
            max_subnum_col=CRATE_COL_MAX_SUBNUM,
            tablename=table.name,
            rio_number=CRATE_COL_RIO_NUMBER,
        ))

    # Set a single column accordingly
    log.info("Progress notes table {}: updating {}".format(
        repr(table.name), repr(CRATE_COL_LAST_NOTE)))
    execute(
        engine, """
        UPDATE {tablename} SET
            {last_note_col} =
                CASE
                    WHEN SubNum = {max_subnum_col} THEN 1
                    ELSE 0
                END
        WHERE {last_note_col} IS NULL
    """.format(
            tablename=table.name,
            last_note_col=CRATE_COL_LAST_NOTE,
            max_subnum_col=CRATE_COL_MAX_SUBNUM,
        ))

    # Create a view, if we're on an RCEP database
    if progargs.rcep and progargs.cpft:
        select_sql = """
            SELECT *
            FROM {tablename}
            WHERE
                (EnteredInError <> 1 OR EnteredInError IS NULL)
                AND {last_note_col} = 1
        """.format(
            tablename=table.name,
            last_note_col=CRATE_COL_LAST_NOTE,
        )
        create_view(engine, VIEW_RCEP_CPFT_PROGRESS_NOTES_CURRENT, select_sql)
示例#5
0
def process_patient_table(table: Table, engine: Engine, progargs: Any) -> None:
    log.info("Preprocessing patient table: {}".format(repr(table.name)))
    rio_type = table_is_rio_type(table.name, progargs)
    if rio_type:
        pk_col = get_effective_int_pk_col(table)
        rio_pk = pk_col if pk_col != CRATE_COL_PK else None
        string_pt_id = get_rio_patient_id_col(table)
        required_cols = [string_pt_id]
    else:  # RCEP type
        rio_pk = None
        required_cols = [RCEP_COL_PATIENT_ID]
        string_pt_id = RCEP_COL_PATIENT_ID
    if not progargs.print:
        required_cols.extend([CRATE_COL_PK, CRATE_COL_RIO_NUMBER])

    # -------------------------------------------------------------------------
    # Add pk and rio_number columns, if not present
    # -------------------------------------------------------------------------
    if rio_type and rio_pk is not None:
        crate_pk_col = Column(CRATE_COL_PK, BigInteger, nullable=True)
        # ... can't do NOT NULL; need to populate it
        required_cols.append(rio_pk)
    else:  # RCEP type, or no PK in RiO
        crate_pk_col = make_bigint_autoincrement_column(
            CRATE_COL_PK, engine.dialect)
        # ... autopopulates
    crate_rio_number_col = Column(CRATE_COL_RIO_NUMBER,
                                  BigInteger,
                                  nullable=True)
    # ... even if RiO numbers are INT, they come from VARCHAR(15) here, and
    # that can (aod does) look numeric and overflow an INT.
    # SQL Server requires Table-bound columns in order to generate DDL:
    table.append_column(crate_pk_col)
    table.append_column(crate_rio_number_col)
    add_columns(engine, table, [crate_pk_col, crate_rio_number_col])

    # -------------------------------------------------------------------------
    # Update pk and rio_number values, if not NULL
    # -------------------------------------------------------------------------
    ensure_columns_present(engine,
                           tablename=table.name,
                           column_names=required_cols)
    cast_id_to_int = sql_fragment_cast_to_int(string_pt_id,
                                              dialect=engine.dialect)
    if rio_type and rio_pk:
        log.info("Table {}: updating columns {} and {}".format(
            repr(table.name), repr(CRATE_COL_PK), repr(CRATE_COL_RIO_NUMBER)))
        execute(
            engine, """
            UPDATE {tablename} SET
                {crate_pk} = {rio_pk},
                {crate_rio_number} = {cast_id_to_int}
            WHERE
                {crate_pk} IS NULL
                OR {crate_rio_number} IS NULL
        """.format(
                tablename=table.name,
                crate_pk=CRATE_COL_PK,
                rio_pk=rio_pk,
                crate_rio_number=CRATE_COL_RIO_NUMBER,
                cast_id_to_int=cast_id_to_int,
            ))
    else:
        # RCEP format, or RiO with no PK
        # crate_pk is autogenerated as an INT IDENTITY field
        log.info("Table {}: updating column {}".format(
            repr(table.name), repr(CRATE_COL_RIO_NUMBER)))
        execute(
            engine,
            """
            UPDATE {tablename} SET
                {crate_rio_number} = {cast_id_to_int}
            WHERE
                {crate_rio_number} IS NULL
        """.format(  # noqa
                tablename=table.name,
                crate_rio_number=CRATE_COL_RIO_NUMBER,
                cast_id_to_int=cast_id_to_int,
            ))
    # -------------------------------------------------------------------------
    # Add indexes, if absent
    # -------------------------------------------------------------------------
    # Note that the indexes are unlikely to speed up the WHERE NOT NULL search
    # above, so it doesn't matter that we add these last. Their use is for
    # the subsequent CRATE anonymisation table scans.
    add_indexes(engine, table, [
        {
            'index_name': CRATE_IDX_PK,
            'column': CRATE_COL_PK,
            'unique': True,
        },
        {
            'index_name': CRATE_IDX_RIONUM,
            'column': CRATE_COL_RIO_NUMBER,
        },
    ])