Пример #1
0
def build_packages(python_packages, bucket):
    cmd_partial = {}
    build_order = get_build_order(python_packages)
    for package_name in build_order:
        python_package = python_packages[package_name]
        if package_name == "requirements.txt":
            requirements_path = os.path.join(LOCAL_PACKAGE_PATH, package_name)
            aws.download_file_from_s3(python_package["src_key"],
                                      requirements_path, bucket)
            cmd_partial[package_name] = "-r " + requirements_path
        else:
            aws.download_and_extract_zip(python_package["src_key"],
                                         LOCAL_PACKAGE_PATH, bucket)
            cmd_partial[package_name] = os.path.join(LOCAL_PACKAGE_PATH,
                                                     package_name)

    logger.info("Setting up packages")

    restricted_packages = get_restricted_packages()

    for package_name in build_order:
        package_wheel_path = os.path.join(WHEELHOUSE_PATH, package_name)
        requirement = cmd_partial[package_name]
        logger.info("Building: {}".format(package_name))
        completed_process = run("pip3 wheel -w {} {}".format(
            package_wheel_path, requirement).split())

        if completed_process.returncode != 0:
            raise UserException("creating wheels", package_name)

        for wheelname in os.listdir(package_wheel_path):
            name_split = wheelname.split("-")
            dist_name, version = name_split[0], name_split[1]
            expected_version = restricted_packages.get(dist_name, None)
            if expected_version is not None and version != expected_version:
                raise UserException(
                    "when installing {}, found {}=={} but cortex requires {}=={}"
                    .format(package_name, dist_name, version, dist_name,
                            expected_version))

    logger.info("Validating packages")

    for package_name in build_order:
        requirement = cmd_partial[package_name]
        logger.info("Installing: {}".format(package_name))
        completed_process = run(
            "pip3 install --no-index --find-links={} {}".format(
                os.path.join(WHEELHOUSE_PATH, package_name),
                requirement).split())
        if completed_process.returncode != 0:
            raise UserException("installing package", package_name)

    logger.info("Caching built packages")

    for package_name in build_order:
        aws.compress_zip_and_upload(
            os.path.join(WHEELHOUSE_PATH, package_name),
            python_packages[package_name]["package_key"],
            bucket,
        )
Пример #2
0
def run_custom_aggregator(aggregator_resource, df, ctx, spark):
    aggregator = ctx.aggregators[aggregator_resource["aggregator"]]
    aggregate_name = aggregator_resource["name"]
    aggregator_impl, _ = ctx.get_aggregator_impl(aggregate_name)
    input_schema = aggregator_resource["inputs"]
    aggregator_column_input = input_schema["columns"]
    args_schema = input_schema["args"]
    args = {}
    if input_schema.get("args",
                        None) is not None and len(input_schema["args"]) > 0:
        args = ctx.populate_args(input_schema["args"])
    try:
        result = aggregator_impl.aggregate_spark(df, aggregator_column_input,
                                                 args)
    except Exception as e:
        raise UserRuntimeException(
            "aggregate " + aggregator_resource["name"],
            "aggregator " + aggregator["name"],
            "function aggregate_spark",
        ) from e

    if not util.validate_value_type(result, aggregator["output_type"]):
        raise UserException(
            "aggregate " + aggregator_resource["name"],
            "aggregator " + aggregator["name"],
            "type of {} is not {}".format(
                util.str_rep(util.pp_str(result), truncate=100),
                aggregator["output_type"]),
        )

    ctx.store_aggregate_result(result, aggregator_resource)
    return result
Пример #3
0
def transform_sample(sample):
    ctx = local_cache["ctx"]
    model = local_cache["model"]

    transformed_sample = {}

    for column_name in ctx.extract_column_names(model["input"]):
        if ctx.is_raw_column(column_name):
            transformed_value = sample[column_name]
        else:
            transformed_column = ctx.transformed_columns[column_name]
            trans_impl = local_cache["trans_impls"][column_name]
            if not hasattr(trans_impl, "transform_python"):
                raise UserException(
                    "transformed column " + column_name,
                    "transformer " + transformed_column["transformer"],
                    "transform_python function is missing",
                )
            input = ctx.populate_values(transformed_column["input"],
                                        None,
                                        preserve_column_refs=True)
            transformer_input = create_transformer_inputs_from_map(
                input, sample)
            transformed_value = trans_impl.transform_python(transformer_input)

        transformed_sample[column_name] = transformed_value

    return transformed_sample
Пример #4
0
def ingest(ctx, spark):
    if ctx.environment["data"]["type"] == "csv":
        df = read_csv(ctx, spark)
    elif ctx.environment["data"]["type"] == "parquet":
        df = read_parquet(ctx, spark)

    input_type_map = {f.name: f.dataType for f in df.schema}

    for raw_column_name in ctx.raw_columns.keys():
        raw_column = ctx.raw_columns[raw_column_name]
        expected_types = CORTEX_TYPE_TO_ACCEPTABLE_SPARK_TYPES[
            raw_column["type"]]
        actual_type = input_type_map[raw_column_name]
        if actual_type not in expected_types:
            logger.error("found schema:")
            log_df_schema(df, logger.error)

            raise UserException(
                "raw column " + raw_column_name,
                "type mismatch",
                "expected {} but found {}".format(
                    " or ".join(str(x) for x in expected_types), actual_type),
            )
        target_type = CORTEX_TYPE_TO_SPARK_TYPE[raw_column["type"]]

        if target_type != actual_type:
            df = df.withColumn(raw_column_name,
                               F.col(raw_column_name).cast(target_type))

    return df.select(*sorted(df.columns))
Пример #5
0
def transform_features(raw_features):
    ctx = local_cache["ctx"]
    model = local_cache["model"]

    transformed_features = {}

    for feature_name in model["features"]:
        if ctx.is_raw_feature(feature_name):
            transformed_feature = raw_features[feature_name]
        else:
            inputs = ctx.create_inputs_from_features_map(
                raw_features, feature_name)
            trans_impl = local_cache["trans_impls"][feature_name]
            if not hasattr(trans_impl, "transform_python"):
                raise UserException(
                    "transformed feature " + feature_name,
                    "transformer " +
                    ctx.transformed_features[feature_name]["transformer"],
                    "transform_python function missing",
                )

            args = local_cache["transform_args_cache"].get(feature_name, {})
            transformed_feature = trans_impl.transform_python(inputs, args)
        transformed_features[feature_name] = transformed_feature

    return transformed_features
Пример #6
0
def run_custom_aggregator(aggregate, df, ctx, spark):
    aggregator = ctx.aggregators[aggregate["aggregator"]]
    aggregator_impl, _ = ctx.get_aggregator_impl(aggregate["name"])

    try:
        input = ctx.populate_values(aggregate["input"],
                                    aggregator["input"],
                                    preserve_column_refs=False)
    except CortexException as e:
        e.wrap("aggregate " + aggregate["name"], "input")
        raise

    try:
        result = aggregator_impl.aggregate_spark(df, input)
    except Exception as e:
        raise UserRuntimeException(
            "aggregate " + aggregate["name"],
            "aggregator " + aggregator["name"],
            "function aggregate_spark",
        ) from e

    if aggregator.get(
            "output_type") is not None and not util.validate_output_type(
                result, aggregator["output_type"]):
        raise UserException(
            "aggregate " + aggregate["name"],
            "aggregator " + aggregator["name"],
            "unsupported return type (expected type {}, got {})".format(
                util.data_type_str(aggregator["output_type"]),
                util.user_obj_str(result)),
        )

    result = util.cast_output_type(result, aggregator["output_type"])
    ctx.store_aggregate_result(result, aggregate)
    return result
Пример #7
0
def transform_sample(sample):
    ctx = local_cache["ctx"]
    model = local_cache["model"]

    transformed_sample = {}

    for column_name in model["feature_columns"]:
        if ctx.is_raw_column(column_name):
            transformed_value = sample[column_name]
        else:
            inputs = ctx.create_column_inputs_map(sample, column_name)
            trans_impl = local_cache["trans_impls"][column_name]
            if not hasattr(trans_impl, "transform_python"):
                raise UserException(
                    "transformed column " + column_name,
                    "transformer " +
                    ctx.transformed_sample[column_name]["transformer"],
                    "transform_python function missing",
                )

            args = local_cache["transform_args_cache"].get(column_name, {})
            transformed_value = trans_impl.transform_python(inputs, args)
        transformed_sample[column_name] = transformed_value

    return transformed_sample
Пример #8
0
def install_packages(python_packages, bucket):
    build_order = get_build_order(python_packages)

    for package_name in build_order:
        python_package = python_packages[package_name]
        aws.download_and_extract_zip(
            python_package["package_key"],
            os.path.join(WHEELHOUSE_PATH, package_name), bucket)

    if "requirements.txt" in python_packages:
        aws.download_file_from_s3(
            python_packages["requirements.txt"]["src_key"],
            "/requirements.txt", bucket)

    for package_name in build_order:
        cmd = package_name
        if package_name == "requirements.txt":
            cmd = "-r /requirements.txt"

        completed_process = run(
            "pip3 install --no-cache-dir --no-index --find-links={} {}".format(
                os.path.join(WHEELHOUSE_PATH, package_name), cmd).split())
        if completed_process.returncode != 0:
            raise UserException("installing package", package_name)

    util.rm_file("/requirements.txt")
    util.rm_dir(WHEELHOUSE_PATH)
Пример #9
0
def build_packages(python_packages, bucket):
    cmd_partial = {}
    build_order = get_build_order(python_packages)
    for package_name in build_order:
        python_package = python_packages[package_name]
        if package_name == "requirements.txt":
            requirements_path = os.path.join(LOCAL_PACKAGE_PATH, package_name)
            aws.download_file_from_s3(python_package["src_key"],
                                      requirements_path, bucket)
            cmd_partial[package_name] = "-r " + requirements_path
        else:
            aws.download_and_extract_zip(python_package["src_key"],
                                         LOCAL_PACKAGE_PATH, bucket)
            cmd_partial[package_name] = os.path.join(LOCAL_PACKAGE_PATH,
                                                     package_name)

    logger.info("Setting up packages")

    for package_name in build_order:
        requirement = cmd_partial[package_name]
        logger.info("Building package {}".format(package_name))
        completed_process = run("pip3 wheel -w {} {}".format(
            os.path.join(WHEELHOUSE_PATH, package_name), requirement).split())
        if completed_process.returncode != 0:
            raise UserException("creating wheels", package_name)

    logger.info("Validating packages")

    for package_name in build_order:
        requirement = cmd_partial[package_name]
        logger.info("Installing package {}".format(package_name))
        completed_process = run(
            "pip3 install --no-index --find-links={} {}".format(
                os.path.join(WHEELHOUSE_PATH, package_name),
                requirement).split())
        if completed_process.returncode != 0:
            raise UserException("installing package", package_name)

    logger.info("Caching built packages")

    for package_name in build_order:
        aws.compress_zip_and_upload(
            os.path.join(WHEELHOUSE_PATH, package_name),
            python_packages[package_name]["package_key"],
            bucket,
        )
Пример #10
0
def validate_dataset(ctx, raw_df, cols_to_validate):
    total_row_count = ctx.get_metadata(ctx.raw_dataset["key"])["dataset_size"]
    conditions_dict = spark_util.value_check_data(ctx, raw_df,
                                                  cols_to_validate)

    if len(conditions_dict) > 0:
        for column, cond_count_list in conditions_dict.items():
            for condition, fail_count in cond_count_list:
                logger.error(
                    "Data validation {} has been violated in {}/{} samples".
                    format(condition, fail_count, total_row_count))
        raise UserException("raw column validations failed")
Пример #11
0
        def _transform_and_validate(*values):
            result = _transform(*values)
            if not util.validate_column_type(result,
                                             transformed_column["type"]):
                raise UserException(
                    "transformed column " + column_name,
                    "tranformation " + transformed_column["transformer"],
                    "type of {} is not {}".format(result,
                                                  transformed_column["type"]),
                )

            return result
Пример #12
0
def _validate_required_fn_args(impl, fn_name, args):
    fn = getattr(impl, fn_name, None)
    if not fn:
        raise UserException("function " + fn_name, "could not find function")

    if not callable(fn):
        raise UserException("function " + fn_name, "not a function")

    argspec = inspect.getargspec(fn)
    if argspec.varargs != None or argspec.keywords != None or argspec.defaults != None:
        raise UserException(
            "function " + fn_name,
            "invalid function signature, can only accept positional arguments",
        )

    if args:
        if argspec.args != args:
            raise UserException(
                "function " + fn_name,
                "expected function arguments arguments ({}) but found ({})".
                format(", ".join(args), ", ".join(argspec.args)),
            )
Пример #13
0
def validate_model_dir(model_dir):
    """
    validates that model_dir has the expected directory tree.

    For example (your TF serving version number may be different):

    1562353043/
        saved_model.pb
        variables/
            variables.data-00000-of-00001
            variables.index
    """
    version = os.listdir(model_dir)[0]
    if not version.isdigit():
        raise UserException(
            "No versions of servable default found under base path in model_dir. See docs.cortex.dev for how to properly package your TensorFlow model"
        )

    if "saved_model.pb" not in os.listdir(os.path.join(model_dir, version)):
        raise UserException(
            'Expected packaged model to have a "saved_model.pb" file. See docs.cortex.dev for how to properly package your TensorFlow model'
        )
Пример #14
0
    def model_config(self, model_name):
        model = self.models[model_name]
        if model is None:
            return None
        estimator = self.estimators[model["estimator"]]
        target_column = self.columns[util.get_resource_ref(model["target_column"])]

        if estimator.get("target_column") is not None:
            target_col_type = self.get_inferred_column_type(target_column["name"])
            if target_col_type not in estimator["target_column"]:
                raise UserException(
                    "model " + model_name,
                    "target_column",
                    target_column["name"],
                    "unsupported type (expected type {}, got type {})".format(
                        util.data_type_str(estimator["target_column"]),
                        util.data_type_str(target_col_type),
                    ),
                )

        model_config = deepcopy(model)
        config_keys = [
            "name",
            "estimator"
            "estimator_path"
            "target_column"
            "input"
            "training_input"
            "hparams"
            "prediction_key"
            "data_partition_ratio"
            "training"
            "evaluation"
            "tags",
        ]
        util.keep_dict_keys(model_config, config_keys)

        model_config["target_column"] = target_column["name"]
        model_config["input"] = self.populate_values(
            model["input"], estimator["input"], preserve_column_refs=False
        )
        if model.get("training_input") is not None:
            model_config["training_input"] = self.populate_values(
                model["training_input"], estimator["training_input"], preserve_column_refs=False
            )
        if model.get("hparams") is not None:
            model_config["hparams"] = self.populate_values(
                model["hparams"], estimator["hparams"], preserve_column_refs=False
            )

        return model_config
Пример #15
0
        def _transform_and_validate(*values):
            result = _transform(*values)
            if not util.validate_cortex_type(result, column_type):
                raise UserException(
                    "transformed column " + column_name,
                    "tranformer " + transformed_column["transformer"],
                    "incorrect return value type: expected {}, got {}.".format(
                        " or ".join(CORTEX_TYPE_TO_ACCEPTABLE_PYTHON_TYPE_STRS[
                            column_type]),
                        util.user_obj_str(result),
                    ),
                )

            return result
Пример #16
0
    def load_module(self, module_prefix, module_name, impl_key):
        full_module_name = "{}_{}".format(module_prefix, module_name)

        try:
            impl_path = self.download_python_file(impl_key, full_module_name)
        except CortexException as e:
            e.wrap("unable to find python file " + module_name)
            raise

        try:
            impl = imp.load_source(full_module_name, impl_path)
        except Exception as e:
            raise UserException("unable to load python module " + module_name) from e

        return impl, impl_path
Пример #17
0
def read_parquet(ctx, spark):
    parquet_config = ctx.environment["data"]
    df = spark.read.parquet(parquet_config["path"])

    parquet_columns = [c["column_name"] for c in parquet_config["schema"]]
    missing_cols = util.subtract_lists(parquet_columns, df.columns)
    if len(missing_cols) > 0:
        raise UserException("parquet dataset",
                            "missing columns: " + str(missing_cols))

    selectExprs = [
        "{} as {}".format(c["column_name"], c["feature_name"])
        for c in parquet_config["schema"]
    ]

    return df.selectExpr(*selectExprs)
Пример #18
0
def ingest(ctx, spark):
    expected_schema = expected_schema_from_context(ctx)

    if ctx.environment["data"]["type"] == "csv":
        df = read_csv(ctx, spark)
    elif ctx.environment["data"]["type"] == "parquet":
        df = read_parquet(ctx, spark)

    if compare_column_schemas(expected_schema, df.schema) is not True:
        logger.error("expected schema:")
        log_df_schema(spark.createDataFrame([], expected_schema), logger.error)
        logger.error("found schema:")
        log_df_schema(df, logger.error)

        raise UserException("raw data schema mismatch")

    return df
Пример #19
0
def cast_compound_type(value, type_str):
    allowed_types = type_str.split("|")
    if consts.VALUE_TYPE_INT in allowed_types:
        if util.is_int(value):
            return value
    if consts.VALUE_TYPE_FLOAT in allowed_types:
        if util.is_int(value):
            return float(value)
        if util.is_float(value):
            return value
    if consts.VALUE_TYPE_STRING in allowed_types:
        if util.is_str(value):
            return value
    if consts.VALUE_TYPE_BOOL in allowed_types:
        if util.is_bool(value):
            return value

    raise UserException(
        "unsupported input type (expected type {}, got {})".format(
            util.data_type_str(type_str), util.user_obj_str(value)
        )
    )
Пример #20
0
def transform_column(column_name, df, ctx, spark):
    if not ctx.is_transformed_column(column_name):
        return df
    if column_name in df.columns:
        return df
    transformed_column = ctx.transformed_columns[column_name]

    trans_impl, trans_impl_path = ctx.get_transformer_impl(column_name)
    if hasattr(trans_impl, "transform_spark"):
        return execute_transform_spark(column_name, df, ctx, spark).withColumn(
            column_name,
            F.col(column_name).cast(CORTEX_TYPE_TO_SPARK_TYPE[
                ctx.transformed_columns[column_name]["type"]]),
        )
    elif hasattr(trans_impl, "transform_python"):
        return execute_transform_python(column_name, df, ctx, spark)
    else:
        raise UserException(
            "transformed column " + column_name,
            "transformer " + transformed_column["transformer"],
            "transform_spark(), transform_python(), or both must be defined",
        )
Пример #21
0
def read_parquet(ctx, spark):
    parquet_config = ctx.environment["data"]
    df = spark.read.parquet(parquet_config["path"])

    alias_map = {}
    for parquet_col_config in parquet_config["schema"]:
        col_name = util.get_resource_ref(parquet_col_config["raw_column"])
        if col_name in ctx.raw_columns:
            alias_map[col_name] = parquet_col_config["parquet_column_name"]

    missing_cols = set(alias_map.keys()) - set(df.columns)
    if len(missing_cols) > 0:
        logger.error("found schema:")
        log_df_schema(df, logger.error)
        raise UserException("missing column(s) in input dataset",
                            str(missing_cols))

    selectExprs = [
        "{} as {}".format(parq_name, col_name)
        for col_name, parq_name in alias_map.items()
    ]

    return df.selectExpr(*selectExprs)
Пример #22
0
def read_parquet(ctx, spark):
    parquet_config = ctx.environment["data"]
    df = spark.read.parquet(parquet_config["path"])

    alias_map = {
        c["parquet_column_name"]: c["raw_column_name"]
        for c in parquet_config["schema"]
        if c["parquet_column_name"] in ctx.raw_columns
    }

    missing_cols = set(alias_map.keys()) - set(df.columns)
    if len(missing_cols) > 0:
        logger.error("found schema:")
        log_df_schema(df, logger.error)
        raise UserException("missing column(s) in input dataset",
                            str(missing_cols))

    selectExprs = [
        "{} as {}".format(alias_map[alias], alias)
        for alias in alias_map.keys()
    ]

    return df.selectExpr(*selectExprs)
Пример #23
0
def read_csv(ctx, spark):
    data_config = ctx.environment["data"]

    csv_config = {
        util.snake_to_camel(param_name): val
        for param_name, val in data_config.get("csv_config", {}).items()
        if val is not None
    }

    df = spark.read.csv(data_config["path"],
                        inferSchema=True,
                        mode="FAILFAST",
                        **csv_config)
    if len(data_config["schema"]) != len(df.columns):
        raise UserException("expected " + len(data_config["schema"]) +
                            " column(s) but got " + len(df.columns))

    col_names = [
        util.get_resource_ref(col_ref) for col_ref in data_config["schema"]
    ]
    renamed_cols = [
        F.col(c).alias(col_names[idx]) for idx, c in enumerate(df.columns)
    ]
    return df.select(*renamed_cols)
Пример #24
0
def transform_column(column_name, df, ctx, spark):
    if not ctx.is_transformed_column(column_name):
        return df
    if column_name in df.columns:
        return df

    transformed_column = ctx.transformed_columns[column_name]
    trans_impl, _ = ctx.get_transformer_impl(column_name)

    if hasattr(trans_impl, "transform_spark"):
        try:
            df = execute_transform_spark(column_name, df, ctx, spark)
            return df.withColumn(
                column_name,
                F.col(column_name).cast(CORTEX_TYPE_TO_SPARK_TYPE[
                    ctx.get_inferred_column_type(column_name)]),
            )
        except CortexException as e:
            raise UserRuntimeException(
                "transformed column " + column_name,
                transformed_column["transformer"] + ".transform_spark",
            ) from e
    elif hasattr(trans_impl, "transform_python"):
        try:
            return execute_transform_python(column_name, df, ctx, spark)
        except Exception as e:
            raise UserRuntimeException(
                "transformed column " + column_name,
                transformed_column["transformer"] + ".transform_python",
            ) from e
    else:
        raise UserException(
            "transformed column " + column_name,
            "transformer " + transformed_column["transformer"],
            "transform_spark(), transform_python(), or both must be defined",
        )
Пример #25
0
def ingest_raw_dataset(spark, ctx, features_to_validate, should_ingest):
    if should_ingest:
        features_to_validate = list(ctx.rf_id_map.keys())

    if len(features_to_validate) > 0:
        feature_resources_to_validate = [
            ctx.rf_id_map[f] for f in features_to_validate
        ]
        ctx.upload_resource_status_start(*feature_resources_to_validate)
        try:
            if should_ingest:
                logger.info("Ingesting")
                logger.info("Ingesting {} data from {}".format(
                    ctx.app["name"], ctx.environment["data"]["path"]))
                ingest_df = spark_util.ingest(ctx, spark)
                full_dataset_counter = ingest_df.count()
                if ctx.environment["data"].get("drop_null"):
                    ingest_df = ingest_df.dropna()
                    logger.info("Dropping any rows that contain null values")
                    write_dataset_counter = ingest_df.count()

                logger.info("Caching {} data (version: {})".format(
                    ctx.app["name"], ctx.dataset_version))
                spark_util.write_raw_dataset(ingest_df, ctx)

                if ctx.environment["data"].get("drop_null"):
                    logger.info(
                        "{} rows read, {} rows dropped, {} rows ingested".
                        format(
                            full_dataset_counter,
                            full_dataset_counter - write_dataset_counter,
                            write_dataset_counter,
                        ))
                else:
                    logger.info(
                        "{} rows ingested".format(full_dataset_counter))
            logger.info("Reading {} data (version: {})".format(
                ctx.app["name"], ctx.dataset_version))
            raw_df = spark_util.read_raw_dataset(ctx, spark)
            total_row_count = raw_df.count()
            conditions_dict = spark_util.value_check_data(
                ctx, raw_df, features_to_validate)

            if len(conditions_dict) > 0:
                for column, cond_count_list in conditions_dict.items():
                    for condition, fail_count in cond_count_list:
                        logger.error(
                            "Data validation {} has been violated in {}/{} samples"
                            .format(condition, fail_count, total_row_count))
                raise UserException("raw feature validations failed")
        except:
            ctx.upload_resource_status_failed(*feature_resources_to_validate)
            raise
        ctx.upload_resource_status_success(*feature_resources_to_validate)
        logger.info("First {} samples:".format(3))
        show_df(raw_df, ctx, 3)
    else:
        logger.info("Reading {} data (version: {})".format(
            ctx.app["name"], ctx.dataset_version))
        raw_df = spark_util.read_raw_dataset(ctx, spark)
        spark_util.value_check_data(ctx, raw_df, features_to_validate)

    return raw_df
Пример #26
0
def ingest(ctx, spark):
    fileType = ctx.environment["data"]["type"]
    if fileType == "csv":
        df = read_csv(ctx, spark)
    elif fileType == "parquet":
        df = read_parquet(ctx, spark)

    input_type_map = {f.name: f.dataType for f in df.schema}
    for raw_column_name in ctx.raw_columns:
        raw_column = ctx.raw_columns[raw_column_name]
        expected_cortex_type = raw_column["type"]
        actual_spark_type = input_type_map[raw_column_name]

        if expected_cortex_type == consts.COLUMN_TYPE_INFERRED:
            if actual_spark_type not in SPARK_TYPE_TO_CORTEX_TYPE:
                df = df.withColumn(raw_column_name,
                                   F.col(raw_column_name).cast(StringType()))
            else:
                actual_cortex_type = SPARK_TYPE_TO_CORTEX_TYPE[
                    actual_spark_type]
                expected_spark_type = CORTEX_TYPE_TO_SPARK_TYPE[
                    actual_cortex_type]
                if actual_spark_type != expected_spark_type:
                    df = df.withColumn(
                        raw_column_name,
                        F.col(raw_column_name).cast(expected_spark_type))
        else:
            expected_spark_type = CORTEX_TYPE_TO_SPARK_TYPE[
                expected_cortex_type]
            if actual_spark_type in SPARK_TYPE_TO_CORTEX_TYPE:
                expected_types = CORTEX_TYPE_TO_CASTABLE_SPARK_TYPES[fileType][
                    expected_cortex_type]
                if actual_spark_type not in expected_types:
                    logger.error("found schema:")
                    log_df_schema(df, logger.error)

                    raise UserException(
                        "raw column " + raw_column_name,
                        "type mismatch",
                        "expected {} but got {}".format(
                            " or ".join(str(x) for x in expected_types),
                            actual_spark_type),
                    )
                if actual_spark_type != expected_spark_type:
                    df = df.withColumn(
                        raw_column_name,
                        F.col(raw_column_name).cast(expected_spark_type))
            else:
                try:
                    df = df.withColumn(
                        raw_column_name,
                        F.col(raw_column_name).cast(expected_spark_type))
                except Exception as e:
                    raise UserException(
                        "tried casting " + raw_column_name,
                        "from ingested type " + actual_spark_type,
                        "to expected type " + expected_spark_type,
                        "but got exception: " + e,
                    )

    return df.select(*sorted(df.columns))
Пример #27
0
def validate_transformer(column_name, df, ctx, spark):
    transformed_column = ctx.transformed_columns[column_name]

    trans_impl, _ = ctx.get_transformer_impl(column_name)

    if hasattr(trans_impl, "transform_python"):
        try:
            transform_python_collect = execute_transform_python(
                column_name, df, ctx, spark, validate=True).collect()
        except Exception as e:
            raise UserRuntimeException(
                "transformed column " + column_name,
                transformed_column["transformer"] + ".transform_python",
            ) from e

    if hasattr(trans_impl, "transform_spark"):

        try:
            transform_spark_df = execute_transform_spark(
                column_name, df, ctx, spark)

            # check that the return object is a dataframe
            if type(transform_spark_df) is not DataFrame:
                raise UserException(
                    "expected pyspark.sql.dataframe.DataFrame but found type {}"
                    .format(type(transform_spark_df)))

            # check that a column is added with the expected name
            if column_name not in transform_spark_df.columns:
                logger.error("schema of output dataframe:")
                log_df_schema(transform_spark_df, logger.error)

                raise UserException(
                    "output dataframe after running transformer does not have column {}"
                    .format(column_name))

            # check that transformer run on data
            try:
                transform_spark_df.select(column_name).collect()
            except Exception as e:
                raise UserRuntimeException("function transform_spark") from e

            actual_structfield = transform_spark_df.select(
                column_name).schema.fields[0]

            # check that expected output column has the correct data type
            if (actual_structfield.dataType
                    not in CORTEX_TYPE_TO_ACCEPTABLE_SPARK_TYPES[
                        transformed_column["type"]]):
                raise UserException(
                    "incorrect column type, expected {}, found {}.".format(
                        " or ".join(
                            str(t)
                            for t in CORTEX_TYPE_TO_ACCEPTABLE_SPARK_TYPES[
                                transformed_column["type"]]),
                        actual_structfield.dataType,
                    ))

            # perform the necessary upcast/downcast for the column e.g INT -> LONG or DOUBLE -> FLOAT
            transform_spark_df = transform_spark_df.withColumn(
                column_name,
                F.col(column_name).cast(CORTEX_TYPE_TO_SPARK_TYPE[
                    ctx.transformed_columns[column_name]["type"]]),
            )

            # check that the function doesn't modify the schema of the other columns in the input dataframe
            if set(transform_spark_df.columns) - set([column_name]) != set(
                    df.columns):
                logger.error("expected schema:")

                log_df_schema(df, logger.error)

                logger.error(
                    "found schema (with {} dropped):".format(column_name))
                log_df_schema(transform_spark_df.drop(column_name),
                              logger.error)

                raise UserException(
                    "a column besides {} was modifed in the output dataframe".
                    format(column_name))
        except CortexException as e:
            e.wrap(
                "transformed column " + column_name,
                transformed_column["transformer"] + ".transform_spark",
            )
            raise

        if hasattr(trans_impl, "transform_spark") and hasattr(
                trans_impl, "transform_python"):
            name_type_map = [(s.name, s.dataType)
                             for s in transform_spark_df.schema]
            transform_spark_collect = transform_spark_df.collect()

            for tp_row, ts_row in zip(transform_python_collect,
                                      transform_spark_collect):
                tp_dict = tp_row.asDict()
                ts_dict = ts_row.asDict()

                for name, dataType in name_type_map:
                    if tp_dict[name] == ts_dict[name]:
                        continue
                    elif dataType == FloatType() and util.isclose(
                            tp_dict[name], ts_dict[name], FLOAT_PRECISION):
                        continue
                    raise UserException(
                        column_name,
                        "{0}.transform_spark and {0}.transform_python had differing values"
                        .format(transformed_column["transformer"]),
                        "{} != {}".format(ts_row, tp_row),
                    )
Пример #28
0
def validate_transformer(column_name, test_df, ctx, spark):
    transformed_column = ctx.transformed_columns[column_name]
    transformer = ctx.transformers[transformed_column["transformer"]]
    trans_impl, _ = ctx.get_transformer_impl(column_name)

    inferred_python_type = None
    inferred_spark_type = None

    if hasattr(trans_impl, "transform_python"):
        try:
            if transformer["output_type"] == consts.COLUMN_TYPE_INFERRED:
                sample_df = test_df.collect()
                sample = sample_df[0]
                try:
                    input = ctx.populate_values(transformed_column["input"],
                                                transformer["input"],
                                                preserve_column_refs=True)
                except CortexException as e:
                    e.wrap("input")
                    raise
                transformer_input = create_transformer_inputs_from_map(
                    input, sample)
                initial_transformed_value = trans_impl.transform_python(
                    transformer_input)
                inferred_python_type = infer_python_type(
                    initial_transformed_value)

                for row in sample_df:
                    transformer_input = create_transformer_inputs_from_map(
                        input, row)
                    transformed_value = trans_impl.transform_python(
                        transformer_input)
                    if inferred_python_type != infer_python_type(
                            transformed_value):
                        raise UserException(
                            "transformed column " + column_name,
                            "type inference failed, mixed data types in dataframe.",
                            'expected type of "' + transformed_sample +
                            '" to be ' + inferred_python_type,
                        )

                ctx.write_metadata(transformed_column["id"],
                                   {"type": inferred_python_type})

            transform_python_collect = execute_transform_python(
                column_name, test_df, ctx, spark, validate=True).collect()
        except Exception as e:
            raise UserRuntimeException(
                "transformed column " + column_name,
                transformed_column["transformer"] + ".transform_python",
            ) from e

    if hasattr(trans_impl, "transform_spark"):
        try:
            transform_spark_df = execute_transform_spark(
                column_name, test_df, ctx, spark)

            # check that the return object is a dataframe
            if type(transform_spark_df) is not DataFrame:
                raise UserException(
                    "expected pyspark.sql.dataframe.DataFrame but got type {}".
                    format(type(transform_spark_df)))

            # check that a column is added with the expected name
            if column_name not in transform_spark_df.columns:
                logger.error("schema of output dataframe:")
                log_df_schema(transform_spark_df, logger.error)

                raise UserException(
                    "output dataframe after running transformer does not have column {}"
                    .format(column_name))

            if transformer["output_type"] == consts.COLUMN_TYPE_INFERRED:
                inferred_spark_type = SPARK_TYPE_TO_CORTEX_TYPE[
                    transform_spark_df.select(column_name).schema[0].dataType]
                ctx.write_metadata(transformed_column["id"],
                                   {"type": inferred_spark_type})

            # check that transformer run on data
            try:
                transform_spark_df.select(column_name).collect()
            except Exception as e:
                raise UserRuntimeException("function transform_spark") from e

            # check that expected output column has the correct data type
            if transformer["output_type"] != consts.COLUMN_TYPE_INFERRED:
                actual_structfield = transform_spark_df.select(
                    column_name).schema.fields[0]
                if (actual_structfield.dataType
                        not in CORTEX_TYPE_TO_ACCEPTABLE_SPARK_TYPES[
                            transformer["output_type"]]):
                    raise UserException(
                        "incorrect column type: expected {}, got {}.".format(
                            " or ".join(
                                str(t)
                                for t in CORTEX_TYPE_TO_ACCEPTABLE_SPARK_TYPES[
                                    transformer["output_type"]]),
                            actual_structfield.dataType,
                        ))

            # perform the necessary casting for the column
            transform_spark_df = transform_spark_df.withColumn(
                column_name,
                F.col(column_name).cast(CORTEX_TYPE_TO_SPARK_TYPE[
                    ctx.get_inferred_column_type(column_name)]),
            )

            # check that the function doesn't modify the schema of the other columns in the input dataframe
            if set(transform_spark_df.columns) - set([column_name]) != set(
                    test_df.columns):
                logger.error("expected schema:")

                log_df_schema(test_df, logger.error)

                logger.error(
                    "found schema (with {} dropped):".format(column_name))
                log_df_schema(transform_spark_df.drop(column_name),
                              logger.error)

                raise UserException(
                    "a column besides {} was modifed in the output dataframe".
                    format(column_name))
        except CortexException as e:
            raise UserRuntimeException(
                "transformed column " + column_name,
                transformed_column["transformer"] + ".transform_spark",
            ) from e

    if hasattr(trans_impl, "transform_spark") and hasattr(
            trans_impl, "transform_python"):
        if (transformer["output_type"] == consts.COLUMN_TYPE_INFERRED
                and inferred_spark_type != inferred_python_type):
            raise UserException(
                "transformed column " + column_name,
                "type inference failed, transform_spark and transform_python had differing types.",
                "transform_python: " + inferred_python_type,
                "transform_spark: " + inferred_spark_type,
            )

        name_type_map = [(s.name, s.dataType)
                         for s in transform_spark_df.schema]
        transform_spark_collect = transform_spark_df.collect()

        for tp_row, ts_row in zip(transform_python_collect,
                                  transform_spark_collect):
            tp_dict = tp_row.asDict()
            ts_dict = ts_row.asDict()

            for name, dataType in name_type_map:
                if tp_dict[name] == ts_dict[name]:
                    continue
                elif dataType == FloatType() and util.isclose(
                        tp_dict[name], ts_dict[name], FLOAT_PRECISION):
                    continue
                raise UserException(
                    column_name,
                    "{0}.transform_spark and {0}.transform_python had differing values"
                    .format(transformed_column["transformer"]),
                    "{} != {}".format(ts_row, tp_row),
                )
Пример #29
0
    def populate_values(self, input, input_schema, preserve_column_refs):
        if input is None:
            if input_schema is None:
                return None
            if input_schema.get("_allow_null") == True:
                return None
            raise UserException("Null value is not allowed")

        if util.is_resource_ref(input):
            res_name = util.get_resource_ref(input)
            if res_name in self.constants:
                if self.constants[res_name].get("value") is not None:
                    const_val = self.constants[res_name]["value"]
                elif self.constants[res_name].get("path") is not None:
                    const_val = self.storage.get_json_external(self.constants[res_name]["path"])
                try:
                    return self.populate_values(const_val, input_schema, preserve_column_refs)
                except CortexException as e:
                    e.wrap("constant " + res_name)
                    raise

            if res_name in self.aggregates:
                agg_val = self.get_obj(self.aggregates[res_name]["key"])
                try:
                    return self.populate_values(agg_val, input_schema, preserve_column_refs)
                except CortexException as e:
                    e.wrap("aggregate " + res_name)
                    raise

            if res_name in self.columns:
                if input_schema is not None:
                    col_type = self.get_inferred_column_type(res_name)
                    if col_type not in input_schema["_type"]:
                        raise UserException(
                            "column {}: unsupported input type (expected type {}, got type {})".format(
                                res_name,
                                util.data_type_str(input_schema["_type"]),
                                util.data_type_str(col_type),
                            )
                        )
                if preserve_column_refs:
                    return input
                else:
                    return res_name

        if util.is_list(input):
            elem_schema = None
            if input_schema is not None:
                if not util.is_list(input_schema["_type"]):
                    raise UserException(
                        "unsupported input type (expected type {}, got {})".format(
                            util.data_type_str(input_schema["_type"]), util.user_obj_str(input)
                        )
                    )
                elem_schema = input_schema["_type"][0]

                min_count = input_schema.get("_min_count")
                if min_count is not None and len(input) < min_count:
                    raise UserException(
                        "list has length {}, but the minimum allowed length is {}".format(
                            len(input), min_count
                        )
                    )

                max_count = input_schema.get("_max_count")
                if max_count is not None and len(input) > max_count:
                    raise UserException(
                        "list has length {}, but the maximum allowed length is {}".format(
                            len(input), max_count
                        )
                    )

            casted = []
            for i, elem in enumerate(input):
                try:
                    casted.append(self.populate_values(elem, elem_schema, preserve_column_refs))
                except CortexException as e:
                    e.wrap("index " + i)
                    raise
            return casted

        if util.is_dict(input):
            if input_schema is None:
                casted = {}
                for key, val in input.items():
                    key_casted = self.populate_values(key, None, preserve_column_refs)
                    try:
                        val_casted = self.populate_values(val, None, preserve_column_refs)
                    except CortexException as e:
                        e.wrap(util.user_obj_str(key))
                        raise
                    casted[key_casted] = val_casted
                return casted

            if not util.is_dict(input_schema["_type"]):
                raise UserException(
                    "unsupported input type (expected type {}, got {})".format(
                        util.data_type_str(input_schema["_type"]), util.user_obj_str(input)
                    )
                )

            min_count = input_schema.get("_min_count")
            if min_count is not None and len(input) < min_count:
                raise UserException(
                    "map has length {}, but the minimum allowed length is {}".format(
                        len(input), min_count
                    )
                )

            max_count = input_schema.get("_max_count")
            if max_count is not None and len(input) > max_count:
                raise UserException(
                    "map has length {}, but the maximum allowed length is {}".format(
                        len(input), max_count
                    )
                )

            is_generic_map = False
            if len(input_schema["_type"]) == 1:
                input_type_key = next(iter(input_schema["_type"].keys()))
                if is_compound_type(input_type_key):
                    is_generic_map = True
                    generic_map_key_schema = input_schema_from_type_schema(input_type_key)
                    generic_map_value = input_schema["_type"][input_type_key]

            if is_generic_map:
                casted = {}
                for key, val in input.items():
                    key_casted = self.populate_values(
                        key, generic_map_key_schema, preserve_column_refs
                    )
                    try:
                        val_casted = self.populate_values(
                            val, generic_map_value, preserve_column_refs
                        )
                    except CortexException as e:
                        e.wrap(util.user_obj_str(key))
                        raise
                    casted[key_casted] = val_casted
                return casted

            # fixed map
            casted = {}
            for key, val_schema in input_schema["_type"].items():
                if key in input:
                    val = input[key]
                else:
                    if val_schema.get("_optional") is not True:
                        raise UserException("missing key: " + util.user_obj_str(key))
                    if val_schema.get("_default") is None:
                        continue
                    val = val_schema["_default"]

                try:
                    val_casted = self.populate_values(val, val_schema, preserve_column_refs)
                except CortexException as e:
                    e.wrap(util.user_obj_str(key))
                    raise
                casted[key] = val_casted
            return casted

        if input_schema is None:
            return input
        if not util.is_str(input_schema["_type"]):
            raise UserException(
                "unsupported input type (expected type {}, got {})".format(
                    util.data_type_str(input_schema["_type"]), util.user_obj_str(input)
                )
            )
        return cast_compound_type(input, input_schema["_type"])
Пример #30
0
    def get_test_case_tags(conn, ts_id, testtype, rerun=None, type='failed'):
        """
        param: conn: jira connection object
        param: ts_id: test suite id
        """
        globfilepath = os.path.join(os.path.expanduser('~'),
                                    "global_conf.yaml")
        globdata = get_data_from_yaml(globfilepath)
        enable_test_mgt = True
        if 'EnableTestManagement' not in globdata or globdata.get(
                'EnableTestManagement', 'no').lower() == "no":
            enable_test_mgt = False
        # fetching test cases from given master test suite.
        if ',' in ts_id:
            test_cases = []
            ts_id = ts_id.split(',')
            for parent in ts_id:
                cases = conn.search_issues('parent=' + parent,
                                           startAt=0,
                                           maxResults=100)
                cases_b = conn.search_issues('parent=' + parent,
                                             startAt=100,
                                             maxResults=100)
                cases.extend(cases_b)
                test_cases.extend(cases)
        else:
            test_cases = []
            test_cases = conn.search_issues('parent=' + ts_id,
                                            startAt=0,
                                            maxResults=100)
            test_cases_b = conn.search_issues('parent=' + ts_id,
                                              startAt=100,
                                              maxResults=100)
            test_cases.extend(test_cases_b)
        if not test_cases:
            raise Exception(
                "No Testcases found to clone for the given Suite {}".format(
                    ts_id))
        fieldmap = {field['name']: field['id'] for field in conn.fields()}
        os.environ.__dict__['fieldmap'] = {}
        os.environ.__dict__['fieldmap'].update(fieldmap)
        #sutas_id and Automated are customfields added fro sutas framework, \
        # so we don't get these fields dirctly with issue object.
        sutas_id_field = fieldmap['sutas_id']
        try:
            auto_field = fieldmap['Automated?']
        except KeyError:
            auto_field = fieldmap['Automated']
        not_planned = '\n Below mentioned test cases are  in "not planned"' \
            'state hence not running them.\n'
        not_automated = "\n Below mentioned test cases has Automated field" \
            "as 'No' hence not running them.\n"
        np_summ = []
        na_summ = []
        np_id = []
        na_id = []
        tc_list = []
        tc_data = {}
        to_be_cloned_test_cases = []
        np_slack_str = ''
        na_slack_str = ''
        #checking wheather automated and sutas_id fields are existed or not
        if auto_field and sutas_id_field:
            # if test case marked as not planed or it is not automatable, those kind \
            # of test cases will be ignored
            for test_case in test_cases:
                if test_case.fields().status.name.lower() == 'not planned':
                    np_summ.append(test_case.fields().summary.encode('utf-8'))
                    np_id.append(test_case.key)
                elif test_case.raw['fields'][auto_field] == None:
                    na_summ.append(test_case.fields().summary.encode('utf-8'))
                    na_id.append(test_case.key)
                elif test_case.fields().status.name.lower() != 'not planned' and \
                     test_case.raw['fields'][auto_field][0]['value'].lower() == 'yes':
                    if test_case.raw['fields'][sutas_id_field]:
                        tc_list.append(test_case.raw['fields'][sutas_id_field])
                        to_be_cloned_test_cases.append(test_case)
                #sending not planned and not aumatable testcase list as notification through slack
                np_slack_str = TestManagement._format_slack_str(
                    test_case, np_summ, np_id)
                na_slack_str = TestManagement._format_slack_str(
                    test_case, na_summ, na_id)
            if np_slack_str:
                notify.message(not_planned)
                notify.message(np_slack_str)
            if na_slack_str:
                logger.info(not_automated)
                logger.info(na_slack_str)
                #notify.message(not_automated)
                #notify.message(na_slack_str)
            if tc_list:
                logger.info(
                    "Testcases which are automated and updated with sutas_id in jira {}"
                    .format(tc_list))
                #It will filter test cases based on the conifiguration provided by user in user_configuration file
                #for test_type and sprint_no
                robosuitepath = os.environ['robosuitepath']
                suite = TestData(parent=None, source=robosuitepath)
                robotctagids = []
                robotcnames = []
                for testcase in suite.testcase_table:
                    robotcnames.append(testcase.name.lower())
                    robotctagids.append(testcase.tags.value)
                robotcdict = dict(list(zip(robotcnames, robotctagids)))
                if '-t' in sys.argv:
                    to_be_cloned_test_cases = []
                    tcnames = os.environ['testnames'].lower().split(',')
                    for tc in tcnames:
                        if tc in robotcnames:
                            for jira_tc in tc_list:
                                if jira_tc in robotcdict[tc]:
                                    tcid = jira_tc
                                    to_be_cloned_test_cases.append(
                                        conn.issue(tcid))
                        else:
                            raise Exception(
                                "Robot file doesn't contain tc {} Mentioned".
                                format(tc))
                else:
                    to_be_cloned_test_cases = TestManagement.filter_tcs(
                        conn, to_be_cloned_test_cases, testtype)
                if not to_be_cloned_test_cases:
                    raise Exception(
                        "Not found any testcase to clone for the given suite with given configuration"
                    )
                final_tcs = []
                for test_case in to_be_cloned_test_cases:
                    if not rerun:
                        for robotctagid in robotctagids:
                            if robotctagid:
                                if test_case.raw['fields'][
                                        sutas_id_field] in robotctagid:
                                    final_tcs.append(test_case.raw['fields']
                                                     [sutas_id_field])
                                    continue
                    else:
                        if test_case.fields().status.name.lower(
                        ) == 'failed' and type == 'failed':
                            final_tcs.append(
                                test_case.raw['fields'][sutas_id_field])
                        elif test_case.fields().status.name.lower(
                        ) == 'skipped' and type == 'skipped':
                            final_tcs.append(
                                test_case.raw['fields'][sutas_id_field])
                tc_list = list(set(final_tcs))
                tags = 'OR'.join(tc_list)
                logger.info(tags)
            else:
                raise UserException("Make sure fields 'sutas_id' and " \
                            "'Automated' must be there in every test case")
        if not tags:
            raise UserException(
                "No test cases found in provided test suite : ", ts_id)
        issue_obj = None
        if isinstance(ts_id, list):
            os.environ['multiplesuiteids'] = ','.join(ts_id)
            iss_obj = conn.issue(ts_id[-1])
        else:
            iss_obj = conn.issue(ts_id)
        #checking testmanagent enabled or not in configuration files
        if enable_test_mgt:
            if iss_obj:
                if not rerun:
                    #clning the test suite and its test cases from Master test suite
                    clone_testcases = [conn.issue(tc) for tc in tc_list]
                    issue_obj = TestManagement.clone_test_suite(\
                        conn, iss_obj, clone_testcases, testtype)
                    logger.warn("cloned test suite id : " + issue_obj.key)
                    notify.message("cloned test suite id : " + issue_obj.key)
                else:
                    issue_obj = iss_obj
                    logger.warn("Rerunning test suite of id : " +
                                issue_obj.key)
                    notify.message("Rerunning test suite of id : " +
                                   issue_obj.key)
                ts_id = issue_obj.id
                # fetching test cases from cloned test suite.
                test_cases = conn.search_issues('parent=' + ts_id)
                for test_case in test_cases:
                    tc_data[test_case.raw['fields'][sutas_id_field]] = \
                        (test_case.key, test_case.fields().summary)
                    if issue_obj.fields().status.name.upper() in [
                            'TODO', 'TO DO'
                    ] and not rerun:
                        for test_case in test_cases:
                            if test_case.raw['fields'][sutas_id_field]:
                                status = test_case.fields().status.name.lower()
                                if status in ['test in progress','running',\
                                                'passed', 'failed', 'skipped', 'blocked']:
                                    raise Exception(
                                        "Test case already executed, Before \
                                                    executing a test case make sure\
                                                    status in todo or ready to run state."
                                    )
                            else:
                                raise Exception(
                                    "Make sure 'sutas_id' field updateed \
                                                            with test case ID")
                            transitions = conn.transitions(ts_id)
                            #moving test suite to running state
                            for transition in transitions:
                                if transition['name'].lower() in [
                                        'run', 'suite in progress'
                                ]:
                                    conn.transition_issue(
                                        ts_id, str(transition['id']))

                    elif issue_obj.fields().status.name.lower() in [
                            'running', 'suite in progress'
                    ]:
                        if not rerun:
                            raise Exception(
                                "Test suite won't run because it is in Running state,\
                                            If you want to run test suit then clone the master \
                                            suite and provide cloned test suite id"
                            )
                    elif issue_obj.fields().status.name.lower() in [
                            'done', 'completed'
                    ]:
                        if not rerun:
                            raise Exception(
                                "Test suite won't run because it is in Completed state, \
                            If you want to run test suit then clone the master suite and \
                            provide cloned test suite id")
                    os.environ.__dict__["testids"] = {}
                    os.environ.__dict__["testids"].update(tc_data)
            else:
                raise Exception(
                    "No test suite found with provided test suite id :", ts_id)
        else:
            issue_obj = iss_obj
        return tags, issue_obj