Exemplo n.º 1
0
def validate_target_using_key(target_data_dict, required_column_list,
                              recommended_column_list, expected_data_types):
    """Validating target data against required & recommended column names"""

    logger.info('Validating and updating records in MetadataLedger table for '
                'target data')
    len_target_metadata = len(target_data_dict)
    for ind in range(len_target_metadata):
        # Updating default validation for all records
        validation_result = 'Y'
        record_status_result = 'Active'

        # flattened source data created for reference
        flattened_source_data = dict_flatten(
            target_data_dict[ind]['target_metadata'], required_column_list)
        # validate for required values in data
        for item_name in required_column_list:
            # update validation and record status for invalid data
            # Log out error for missing required values
            # item_name = item[:-len(".use")]
            if item_name in flattened_source_data:
                if not flattened_source_data[item_name]:
                    validation_result = 'N'
                    record_status_result = 'Inactive'
                    required_recommended_logs(ind, "Required", item_name)
            else:
                validation_result = 'N'
                record_status_result = 'Inactive'
                required_recommended_logs(ind, "Required", item_name)

        # validate for recommended values in data
        for item_name in recommended_column_list:
            # Log out warning for missing recommended values
            # item_name = item[:-len(".use")]
            if item_name in flattened_source_data:
                if not flattened_source_data[item_name]:
                    required_recommended_logs(ind, "Recommended", item_name)
            else:
                required_recommended_logs(ind, "Recommended", item_name)
        # Type checking for values in metadata
        for item in flattened_source_data:
            # check if datatype has been assigned to field
            if item in expected_data_types:
                # type checking for datetime datatype fields
                if expected_data_types[item] == "datetime":
                    if not is_date(flattened_source_data[item]):
                        required_recommended_logs(ind, "datatype", item)
                # type checking for datatype fields(except datetime)
                elif (not isinstance(flattened_source_data[item],
                                     expected_data_types[item])):
                    required_recommended_logs(ind, "datatype", item)

        # assigning key hash value for source metadata
        key_value_hash = target_data_dict[ind]['target_metadata_key_hash']
        # Calling function to update validation status
        store_target_metadata_validation_status(
            target_data_dict, key_value_hash, validation_result,
            record_status_result, target_data_dict[ind]['target_metadata'])
def create_target_metadata_dict(ind, target_mapping_dict, source_metadata,
                                required_column_list, expected_data_types):
    """Function to replace and transform source data to target data for
    using target mapping schema"""

    # Create dataframe using target metadata schema
    target_schema = pd.DataFrame.from_dict(
        target_mapping_dict,
        orient='index')

    # Flatten source data dictionary for replacing and transformation
    source_metadata = dict_flatten(source_metadata, required_column_list)

    # Updating null values with empty strings for replacing metadata
    source_metadata = {
        k: '' if not v else v for k, v in
        source_metadata.items()}

    # replacing fields to be overwritten or appended
    metadata_df = pd.DataFrame(source_metadata, index=[0])
    metadata = overwrite_metadata_field(metadata_df)

    # Replacing metadata schema with mapped values from source metadata

    target_schema_replaced = target_schema.replace(metadata)

    # Dropping index value and creating json object
    target_data = target_schema_replaced.apply(lambda x: [x.dropna()],
                                               axis=1).to_json()
    # Creating dataframe from json object
    target_data_df = pd.read_json(target_data)

    # transforming target dataframe to dictionary object for replacing
    # values in target with new value
    target_data_dict = target_data_df.to_dict(orient='index')

    # type checking and explicit type conversion of metadata
    target_data_dict = type_checking_target_metadata(ind, target_data_dict,
                                                     expected_data_types)

    # send values to be skipped while creating supplemental data

    supplemental_metadata = \
        create_supplemental_metadata(target_schema.values.tolist(), metadata)

    return target_data_dict, supplemental_metadata
Exemplo n.º 3
0
def get_data_types_for_validation(schema_data_dict):
    """Creating list of fields with the expected datatype objects"""

    # Call function to flatten schema used for validation
    flattened_schema_dict = dict_flatten(schema_data_dict, [])

    # mapping from string to datatype objects
    datatype_to_object = {"int": int, "str": str, "bool": bool}
    expected_data_types = dict()

    #  updating dictionary with expected datatype values for fields in metadata
    for column, value in flattened_schema_dict.items():
        if column.endswith(".data_type"):
            key = column[:-len(".data_type")]
            if value in datatype_to_object:
                value = datatype_to_object[value]
            expected_data_types.update({key: value})

    # Returning required and recommended list for validation
    return expected_data_types
Exemplo n.º 4
0
def validate_source_using_key(source_data_dict, required_column_list,
                              recommended_column_list):
    """Validating source data against required & recommended column names"""

    logger.info("Validating and updating records in MetadataLedger table for "
                "Source data")
    len_source_metadata = len(source_data_dict)
    for ind in range(len_source_metadata):
        # Updating default validation for all records
        validation_result = 'Y'
        record_status_result = 'Active'

        # flattened source data created for reference
        flattened_source_data = dict_flatten(
            source_data_dict[ind]['source_metadata'], required_column_list)
        # validate for required values in data
        for item in required_column_list:
            # update validation and record status for invalid data
            # Log out error for missing required values
            if item in flattened_source_data:
                if not flattened_source_data[item]:
                    validation_result = 'N'
                    required_recommended_logs(ind, "Required", item)
            else:
                validation_result = 'N'
                required_recommended_logs(ind, "Required", item)

        # validate for recommended values in data
        for item in recommended_column_list:
            # Log out warning for missing recommended values
            if item in flattened_source_data:
                if not flattened_source_data[item]:
                    required_recommended_logs(ind, "Recommended", item)
            else:
                required_recommended_logs(ind, "Recommended", item)
        # assigning key hash value for source metadata
        key_value_hash = source_data_dict[ind]['source_metadata_key_hash']
        # Calling function to update validation status
        store_source_metadata_validation_status(
            source_data_dict, key_value_hash, validation_result,
            record_status_result, source_data_dict[ind]['source_metadata'])
Exemplo n.º 5
0
def get_required_fields_for_validation(schema_data_dict):
    """Creating list of fields which are Required & Recommended"""

    # Call function to flatten schema used for validation
    flattened_schema_dict = dict_flatten(schema_data_dict, [])

    # Declare list for required and recommended column names
    required_column_list = list()
    recommended_column_list = list()

    #  Adding values to required and recommended list based on schema
    for column, value in flattened_schema_dict.items():
        if value == "Required":
            if column.endswith(".use"):
                column = column[:-len(".use")]
            required_column_list.append(column)
        elif value == "Recommended":
            if column.endswith(".use"):
                column = column[:-len(".use")]
            recommended_column_list.append(column)

    # Returning required and recommended list for validation
    return required_column_list, recommended_column_list