Exemplo n.º 1
0
def migrate_organization_alias(connections):
    '''
    Extract: Pull aliases along with the corresponding taxonomies
    Transform: Synthesize the value field using the
        :func:nppes_data_generators.names.organizations.synthetic_org_name_generator
    Load: Push synthesized aliases to scrubbed DB

    :param connections: Two-connection tuple to know where to pull the data from and here to push it back to
    :return: Just(connections) if everything went well else Nothing()
    '''
    extract_stmt = """
        SELECT COALESCE(GROUP_CONCAT(DISTINCT t.value), ''), a.* FROM spd_small.organization_alias a
        JOIN organization_taxonomy t ON a.organization_id = t.organization_id
        GROUP BY a.organization_id
        ORDER BY a.value;
    """

    load_stmt = 'INSERT INTO organization_alias VALUES ({});'.format(
        ('%s,' * 6)[:-1])

    def transform(_, aliases):
        name_synthesizer = synthetic_org_name_generator()

        def scrub(record):
            taxonomies, organization_alias_id, period_start, period_end, value, *rest = record
            return (organization_alias_id, period_start, period_end,
                    name_synthesizer(
                        value,
                        *taxonomies.split(',') if taxonomies else []), *rest)

        return Just(map(scrub, aliases))

    return SQLJob(extract_stmt, transform, load_stmt).run(connections)
Exemplo n.º 2
0
def migrate_vhdir_insurance_plan():
    '''
    Extract: Pull all addresses from original DB
    Transform: Synthesize address line1 for each address
    Load: Push synthesized addresses to scrubbed DB

    :param connections: Two-connection tuple to know where to pull the data from and here to push it back to
    :return: Just(connections) if everything went well else Nothing()
    '''
    extract_stmt = 'SELECT * FROM vhdir_insurance_plan;'

    load_stmt = 'INSERT INTO vhdir_insurance_plan VALUES ({});'.format(
        ('%s,' * 10)[:-1])

    def transform(_, plans):
        name_synthesizer = synthetic_insurance_plan_name_generator()

        def scrub_plan(idx_and_record):
            idx, record = idx_and_record
            ip_id, md_id, status, name, *rest = record
            return (ip_id, md_id, status, name_synthesizer(idx, name), *rest)

        return Just(map(scrub_plan, enumerate(plans)))

    return SQLJob(extract_stmt, transform, load_stmt)
Exemplo n.º 3
0
def migrate_identifier(entity_id, system, transformer):
    '''
    Migrate the identifier table for the given entity and system. This function is meant to be called by the entity
    for which we want to synthesize identifiers.

    :param entity_id: Entity to migrate identifiers for (ex. vhdir_organization, vhdir_practitioner...)
    :param system: Identifier system use for these identifiers (ex. http://hl7.org/fhir/sid/us-npi)
    :param transformer: Not all identifiers are synthesized the same way depending on it's context. The entity calling
    this method will tell which one to use.
    :return: Just(connections) if everything went well else Nothing()
    '''
    extract_stmt = """
                    SELECT {0}, identifier_id, identifier_status, identifier_status_value_code, `use`, system, 
                        type_cc_id, period_start, period_end, value
                    FROM identifier 
                    WHERE {0} IS NOT NULL 
                    AND system='{1}';
                    """.format(entity_id, system)
    load_stmt = """
                INSERT INTO identifier ({}, identifier_id, identifier_status, identifier_status_value_code, `use`, 
                    system, type_cc_id, period_start, period_end, value) 
                VALUES ({});
                """.format(entity_id, ('%s,'*10)[:-1])

    return SQLJob(extract_stmt, transformer(entity_id), load_stmt)
Exemplo n.º 4
0
def migrate_vhdir_network():
    '''
    Extract: Pull all networks from original DB
    Transform: Synthesize both name and alias for each network using
        :func:nppes_data_generators.names.networks.synthetic_network_name_generator
    Load: Push synthesized networks to scrubbed DB

    :param connections: Two-connection tuple to know where to pull the data from and here to push it back to
    :return: Just(connections) if everything went well else Nothing()
    '''
    extract_stmt = """SELECT n.network_id, n.meta_data_id, n.active, n.period_start, n.period_end, 
                        n.part_of_resource_reference_id, n.practitioner_role_id, n.organization_affiliation_id, 
                        n.coverage_id, n.plan_id, n.alias, n.name, a.state 
                      FROM vhdir_network n 
                      JOIN (SELECT network_id, COALESCE(GROUP_CONCAT(state)) AS state 
                            FROM address GROUP BY network_id) a 
                      ON n.network_id=a.network_id 
                      ORDER BY a.state;"""

    load_stmt = """INSERT INTO vhdir_network (network_id, meta_data_id, active, period_start, period_end, 
                    part_of_resource_reference_id, practitioner_role_id, organization_affiliation_id, coverage_id, 
                    plan_id, name, alias) VALUES ({})""".format(('%s,'*12)[:-1])

    def transform(_, networks):
        name_synthesizer = synthetic_network_name_generator

        by_state = (_[1] + (_[0],) for group in groupby(networks, lambda n: n[-1]) for _ in enumerate(group[1]))
        with_name = (_[:-4] + (name_synthesizer(_[-3], _[-2], _[-1]), name_synthesizer(_[-4], _[-2], _[-1])) for _ in
                     by_state)

        return Just(with_name)

    return SQLJob(extract_stmt, transform, load_stmt)
Exemplo n.º 5
0
    def run(connections):
        from_, to = connections

        return SQLPipeline(SQLJob(
            '', lambda cursor, params: Just(None),
            'INSERT INTO {2}.{0} SELECT * FROM {1}.{0};'.format(
                table, from_.database, to.database)),
                           setup=setup,
                           teardown=teardown).run(connections)
Exemplo n.º 6
0
def migrate_vhdir_organization():
    '''
    Extract: Pull all organizations from original DB, along with their taxonomies
    Transform: Synthesize the name field using the
        :func:nppes_data_generators.names.organizations.synthetic_org_name_generator, then attach the name of the
        parent organization if any
    Load: Push synthesized organizations to scrubbed DB

    :param connections: Two-connection tuple to know where to pull the data from and here to push it back to
    :return: Just(connections) if everything went well else Nothing()
    '''
    extract_stmt = """
            SELECT o.organization_id, o.name, COALESCE(GROUP_CONCAT(DISTINCT t.value), ''), 
                active, partOf_organization_id
            FROM vhdir_organization o
            LEFT JOIN organization_taxonomy t ON o.organization_id = t.organization_id
            GROUP BY o.organization_id
            ORDER BY name;
            """
    load_stmt = """INSERT INTO vhdir_organization (organization_id, name, active, partOf_organization_id, 
                partOf_organization_name) VALUES (%s, %s, %s, %s, %s);"""

    def transform(_, orgs):
        name_synthesizer = synthetic_org_name_generator()

        def scrub_org(org):
            organization_id, name, taxonomies, active, partOf_organization_id = org
            if organization_id % 10000 == 0:
                print(organization_id)
            return organization_id, name_synthesizer(name, *taxonomies.split(',') if taxonomies else []), active, \
                   partOf_organization_id

        @toolz.curry
        def attach_partOf_name(d, org):
            partOf_name = (d.get(org[-1])[1], ) if d.get(org[-1]) else (None, )
            return org + partOf_name

        # Keep dict of generated names to attach parent org. names afterward
        without_partOf_name = {_[0]: _ for _ in map(scrub_org, orgs)}
        with_partOf_name = map(attach_partOf_name(without_partOf_name),
                               without_partOf_name.values())
        return Just(with_partOf_name)

    return SQLJob(extract_stmt, transform, load_stmt)
Exemplo n.º 7
0
def migrate_telecom(connections):
    '''
    Extract: Pull all telecom records from original DB
    Transform: Synthesize the value field using :func:nppes_data_generators.phone_numbers.phone_number.synthetic_number
    Load: Push synthesized records to scrubbed DB

    :param connections: Two-connection tuple to know where to pull the data from and here to push it back to
    :return: Just(connections) if everything went well else Nothing()
    '''
    extract_stmt = 'SELECT * FROM telecom;'
    load_stmt = 'INSERT INTO telecom VALUES ({});'.format(('%s,' * 16)[:-1])

    def transform(_, telecoms):
        def scrub_number(telecom):
            telecom_id, system, value, *rest = telecom
            return (telecom_id, system, synthetic_number(value), *rest)

        res = Just(list(map(scrub_number, telecoms)))

        return res

    return SQLJob(extract_stmt, transform, load_stmt).run(connections)
Exemplo n.º 8
0
def migrate_address(connections):
    '''
    Extract: Pull all addresses from original DB
    Transform: Synthesize address line1 for each address
    Load: Push synthesized addresses to scrubbed DB

    :param connections: Two-connection tuple to know where to pull the data from and here to push it back to
    :return: Just(connections) if everything went well else Nothing()
    '''
    extract_stmt = 'SELECT * FROM address;'
    load_stmt = 'INSERT INTO address VALUES ({});'.format(('%s,' * 18)[:-1])

    def transform(_, addresses):
        address_line_generator = synthetic_address_line_generator()

        def scrub_line1(record):
            address_id, use, type, text, line1, *rest = record
            return (address_id, use, type, text, address_line_generator(line1),
                    *rest)

        return Just(map(scrub_line1, addresses))

    return SQLJob(extract_stmt, transform, load_stmt).run(connections)
Exemplo n.º 9
0
def migrate_name(connections):
    '''
    Extract: Pull all names from original DB
    Transform: Synthesize first and last names for each record
    Load: Push synthesized names to scrubbed DB

    :param connections: Two-connection tuple to know where to pull the data from and here to push it back to
    :return: Just(connections) if everything went well else Nothing()
    '''
    extract_stmt = 'SELECT * FROM name;'
    load_stmt = 'INSERT INTO name VALUES ({});'.format(('%s,' * 10)[:-1])

    def transform(_, names):
        first_name_generator = synthetic_first_name_generator()
        last_name_generator = synthetic_last_name_generator()

        def scrub_name(record):
            name_id, use, text, family, given, *rest = record
            return (name_id, use, text, last_name_generator(family),
                    first_name_generator(given), *rest)

        return Just(map(scrub_name, names))

    return SQLJob(extract_stmt, transform, load_stmt).run(connections)