示例#1
0
def add_functions(json_payload, credentials=None):
    """
    add kpi functions to a given entity type
    Uses the following APIs:
        POST  /api/kpi/v1/{orgId}/entityType/{entityTypeName}/kpiFunction

    :param credentials: dict analytics-service dev credentials
    :param json_payload:
    ```
    {
        "entity_type_name": "sample_entity_type_name"
        "functions": [
        {
            "name": "RandomUniform", #a valid catalog function name
            # PARAMETERS REQUIRED FOR THE FUNCTION
            # For example bif.RandomUniform needs these additional parameters
            "parameters" :
            {
                "min_value" : 0.1,
                "max_value" : 0.2,
                "output_item" : "discharge_perc"
            }
        }
        ]
    }
    ```
    :return:
    """
    # 1. INPUT CHECKING
    logger.debug('Performing Input Checking')
    payload = validateJSON(json_payload)  # input is valid json
    validate(instance=payload,
             schema=create_kpifunction_schema)  # input has valid schema

    # 2. INPUT PARSING
    if 'entity_type_name' not in payload:
        raise Exception('No Entity Type was specified')

    functions = None
    if 'functions' in payload:
        functions = payload['functions']
        functions = parse_input_functions(functions, credentials=credentials)

    # 3. DATABASE CONNECTION
    # :description: to access Watson IOT Platform Analytics DB.
    logger.debug('Connecting to Database')
    db = Database(credentials=credentials)

    # 4. CREATE CUSTOM ENTITY FROM JSON
    # 4.a Instantiate a custom entity type
    entity_type = BaseCustomEntityType(name=payload['entity_type_name'],
                                       db=db,
                                       functions=functions)
    # 4.b Publish kpi to register kpis and constants to appear in the UI
    entity_type.publish_kpis()

    # 5. CLOSE DB CONNECTION
    db.release_resource()

    return
示例#2
0
def create_custom_entitytype(json_payload, credentials=None, **kwargs):
    """
    creates an entity type using the given json payload
    Uses the following APIs:
        POST /meta/v1/{orgId}/entityType
        POST /api/kpi/v1/{orgId}/entityType/{entity_type_name}/kpiFunctions/import
        POST /api/constants/v1/{orgId}

    :param json_payload: JSON describes metadata required for creating desired entity type
    expected json schema is as follows:
    ```
        example_schema = {
            "type": "object",
            "properties": {
                "entity_type_name": {"type": "string"},
                "metrics": {"type": "array", "items": {"type": "object"}},
                "constants": {"type": "array", "items": {"type": "object"}},
                "dimensions": {"type": "array", "items": {"type": "object"}},
                "functions": {"type": "array", "items": {"type": "object"}},
                "metric_timestamp_column_name":{"type": "string"}
            },
            "required": ["entity_type_name"]
        }
    ```
    example example_schema.metrics/dimensions property
    ```
    [{
        'name': 'metric_a',
        'datatype': 'str'
        # allowed column types number, boolean, literal/string, timestamp
        # accepted datatypes: 'str'/'string, 'int'/'integer', 'number'/'float','datetime', 'bool'/'boolean'
    }]
    ```
    example example_schema.constants property
    ```
    [{
        'name': 'sample_constant_name',
        'datatype' : 'number',
        'value': 0.3,
        'default': 0.3,
        'description': 'optional'
        # accepted datatypes: 'str'/'string, 'int'/'integer', 'number'/'float','datetime', 'bool'/'boolean'
    }]
    ```
    example example_schema.functions property
    ```
    [{
        'name': 'RandomUniform', #a valid catalog function name
        # PARAMETERS REQUIRED FOR THE FUNCTION
        # For example bif.RandomUniform needs these addition parameters
        'parameters' :
        {
            'min_value' : 0.1,
            'max_value' : 0.2,
            'output_item' : 'discharge_perc'
        }
    }]
    ```
    :param credentials: dict analytics-service dev credentials
    :param **kwargs {
        drop_existing bool delete existing table and rebuild the entity type table in Db
        db_schema str if no schema is provided will use the default schema
    }

    :return:
    """

    # 1. INPUT CHECKING
    logger.debug('Performing Input Checking')
    payload = validateJSON(json_payload)  # input is valid json
    validate(instance=payload,
             schema=create_custom_schema)  # input has valid schema

    # 2. INPUT PARSING
    metrics = None
    constants = None
    dimensions = None
    functions = None
    if 'metrics' in payload:
        metrics = payload['metrics']
        metrics = parse_input_columns(metrics)
    if 'constants' in payload:
        constants = payload['constants']
        constants = parse_input_constants(constants)
    if 'dimensions' in payload:
        dimensions = payload['dimensions']
        dimensions = parse_input_columns(dimensions)
    if 'functions' in payload:
        functions = payload['functions']
        functions = parse_input_functions(functions, credentials=credentials)

    # 3. DATABASE CONNECTION
    # :description: to access Watson IOT Platform Analytics DB.
    logger.debug('Connecting to Database')
    db = Database(credentials=credentials)

    # 4. CREATE CUSTOM ENTITY FROM JSON
    # 4.a Instantiate a custom entity type
    # overrides the _timestamp='evt_timestamp'
    if 'metric_timestamp_column_name' in payload.keys():
        BaseCustomEntityType._timestamp = payload[
            'metric_timestamp_column_name']
    # TODO: BaseCustomEntityType.timestamp= add user defined timestamp column
    entity_type = BaseCustomEntityType(name=payload['entity_type_name'],
                                       db=db,
                                       columns=metrics,
                                       constants=constants,
                                       dimension_columns=dimensions,
                                       functions=functions,
                                       **kwargs)
    # 4.b Register entity_type so that it creates a table for input data and appears in the UI
    # Publish kpi to register kpis and constants to appear in the UI
    entity_type.register(publish_kpis=True)

    # 5. CLOSE DB CONNECTION
    db.release_resource()
示例#3
0
def load_metrics_data_from_csv(entity_type_name,
                               file_path,
                               credentials=None,
                               **kwargs):
    """
    reads metrics data from csv and stores in entity type metrics table
    Note: make sure 'deviceid' and 'evt_timestamp' columns are present in csv
    'evt_timestamp' column will be inferred to be current time if None present

    :param entity_type_name: str name of entity we want to load data for
    :param file_path: str path to csv file
    :param credentials: dict analytics-service dev credentials
    :param **kwargs {
        db_schema str if no schema is provided will use the default schema
        if_exists str default:append
    }
    :return:
    """
    # load csv in dataframe
    df = pd.read_csv(file_path)

    # Map the lowering function to all column names
    # required columns are lower case
    df.columns = map(str.lower, df.columns)

    # DATABASE CONNECTION
    # :description: to access Watson IOT Platform Analytics DB.
    logger.debug('Connecting to Database')
    db = Database(credentials=credentials, entity_type=entity_type_name)
    # check if entity type table exists
    db_schema = None
    if 'db_schema' in kwargs:
        db_schema = kwargs['db_schema']
    #get the entity type to add data to
    entity_type_metadata = db.entity_type_metadata.get(entity_type_name)
    logger.debug(entity_type_metadata)
    if entity_type_metadata is None:
        raise RuntimeError(
            f'No entity type {entity_type_name} found.'
            f'Make sure you create entity type before loading data using csv.'
            f'Refer to create_custom_entitytype() to create the entity type first'
        )

    # find required columns
    timestamp_col_name = entity_type_metadata['metricTimestampColumn']
    logical_name = entity_type_metadata['name']
    table_name = db_table_name(entity_type_metadata['metricTableName'],
                               db.db_type)
    deviceid_col = 'deviceid'

    required_cols = db.get_column_names(table=table_name, schema=db_schema)
    missing_cols = list(set(required_cols) - set(df.columns))
    logger.debug(f'missing_cols : {missing_cols}')
    # Add data for missing columns that are required
    #required columns that can't be NULL {'evt_timestamp','device_id','updated_utc','devicetype','rcv_timestamp_utc'}
    for m in missing_cols:
        if m == timestamp_col_name or m == 'rcv_timestamp_utc':
            #get possible timestamp columns and select the first one from all candidate
            df_timestamp = df.filter(like='_timestamp')
            if not df_timestamp.empty:
                df_timestamp_columns = df_timestamp.columns
                timestamp_col = df_timestamp_columns[0]
                df[m] = pd.to_datetime(df_timestamp[timestamp_col])
                logger.debug(
                    f'Inferred column {timestamp_col} as missing column {m}')
            else:
                df[m] = dt.datetime.utcnow() - dt.timedelta(seconds=15)
                logger.debug(
                    f'Adding data: current time to missing column {m}')
        elif m == 'devicetype':
            df[m] = logical_name
            logger.debug(f'Adding data: {logical_name} to missing column {m}')
        elif m == 'updated_utc':
            logger.debug(f'Adding data: current time to missing column {m}')
            df[m] = dt.datetime.utcnow() - dt.timedelta(seconds=15)
        elif m == deviceid_col:
            raise RuntimeError(f'Missing required column {m}')
        else:
            df[m] = None

    # DATA CHECKS
    # 1. Check pd.DataFrame data types against entitytype/database data types
    # coerce data frame object data type to corresponding database-data_type
    # Add None for missing columns (Not added to the db)
    logger.debug(f'Dataframe columns before data check 1. {df.columns}')
    entity_type_columns = entity_type_metadata['dataItemDto']
    df = change_df_dtype_to_db_dtype(df, entity_type_columns)
    logger.debug(f'Dataframe columns after data check 1. {df.columns}')

    # 2. allowed device_id name: alpha-numeric + hypen + underscore + period + between [1,36] length
    # Drop rows with un-allowed device_id names
    logger.debug(
        f'Dataframe has {len(df.index)} rows of data before data check 2')
    df = df[df[deviceid_col].str.contains(r'^[A-Za-z0-9._-]+$')]
    df = df[df[deviceid_col].str.len() <= 36]
    logger.warning(
        f'This function will ignore rows where deviceid has values that are not allowed'
    )
    logger.warning(
        f'(NOTE) Allowed characters in deviceid string are: alpha-numeric/hypen/underscore/period with '
        f'length of 1 to 36 characters')
    logger.debug(
        f'Dataframe has {len(df.index)} rows of data after data check 2')

    # remove columns that are not required/ in entity type definition
    logger.debug(f'Updating columns: {required_cols}')
    df = df[required_cols]
    logger.debug(f'Top 5 elements of the df written to the db: \n{df.head(5)}')
    # write the dataframe to the database table
    db.write_frame(df=df, table_name=table_name)
    logger.debug(
        f'Generated {len(df.index)} rows of data and inserted into {table_name}'
    )

    # CLOSE DB CONNECTION
    db.release_resource()
def load_metrics_data_from_csv(entity_type_name,
                               file_path,
                               credentials=None,
                               **kwargs):
    """
    reads metrics data from csv and stores in entity type metrics table
    Note: make sure 'deviceid' and 'evt_timestamp' columns are present in csv
    'evt_timestamp' column will be inferred to be current time if None present

    :param entity_type_name: str name of entity we want to load data for
    :param file_path: str path to csv file
    :param credentials: dict analytics-service dev credentials
    :param **kwargs {
        db_schema str if no schema is provided will use the default schema
        if_exists str default:append
    }
    :return:
    """
    # load csv in dataframe
    df = pd.read_csv(file_path)

    # Map the lowering function to all column names
    # required columns are lower case
    df.columns = map(str.lower, df.columns)

    # DATABASE CONNECTION
    # :description: to access Watson IOT Platform Analytics DB.
    logger.debug('Connecting to Database')
    db = Database(credentials=credentials)
    # check if entity type table exists
    db_schema = None
    if 'db_schema' in kwargs:
        db_schema = kwargs['db_schema']
    #get the entity type to add data to
    try:
        entity_type = db.get_entity_type(entity_type_name)
    except:
        raise Exception(
            f'No entity type {entity_type_name} found.'
            f'Make sure you create entity type before loading data using csv.'
            f'Refer to create_custom_entitytype() to create the entity type first'
        )

    # find required columns
    required_cols = db.get_column_names(table=entity_type.name,
                                        schema=db_schema)
    missing_cols = list(set(required_cols) - set(df.columns))
    logger.debug(f'missing_cols : {missing_cols}')
    # Add data for missing columns that are required
    # required columns that can't be NULL {'evt_timestamp',', 'updated_utc', 'devicetype'}
    for m in missing_cols:
        if m == entity_type._timestamp:
            #get possible timestamp columns and select the first one from all candidate
            df_timestamp = df.filter(like='_timestamp')
            if not df_timestamp.empty:
                df_timestamp_columns = df_timestamp.columns
                timestamp_col = df_timestamp_columns[0]
                df[m] = pd.to_datetime(df_timestamp[timestamp_col])
                logger.debug(
                    f'Inferred column {timestamp_col} as missing column {m}')
            else:
                df[m] = dt.datetime.utcnow() - dt.timedelta(seconds=15)
                logger.debug(
                    f'Adding data: current time to missing column {m}')
        elif m == 'devicetype':
            df[m] = entity_type.logical_name
            logger.debug(
                f'Adding data: {entity_type.logical_name} to missing column {m}'
            )
        elif m == 'updated_utc':
            logger.debug(f'Adding data: current time to missing column {m}')
            df[m] = dt.datetime.utcnow() - dt.timedelta(seconds=15)
        elif m == entity_type._entity_id:
            raise Exception(f'Missing required column {m}')
        else:
            df[m] = None

    # remove columns that are not required
    df = df[required_cols]
    # write the dataframe to the database table
    db.write_frame(df=df, table_name=entity_type.name)
    logger.debug(
        f'Generated {len(df.index)} rows of data and inserted into {entity_type.name}'
    )

    # CLOSE DB CONNECTION
    db.release_resource()

    return