示例#1
0
    def full_run_kfold_from_df(self, train_df, method='kfold'):
        """
        Given a training df, will run kfold tests and output all metrics.
        """
        self.check_sufficient_workspaces()
        folds = self.create_folds(method)
        self.create_kfold_WA(folds)

        available_flag = False

        while available_flag == False:
            logger.info("Checking workspaces..")
            available_flag = self.check_workspaces_status()
            time.sleep(20)

        try:
            # results per utterance
            results_kfold = self.run_kfold_test(folds)

            # metrics per intent
            metrics = Metrics(self.threshold)
            classification_report = metrics.get_all_metrics_CV(
                results_kfold, fold_col='fold', detailed_results=False)

        finally:
            self.delete_kfold_workspaces()

        return results_kfold, classification_report
示例#2
0
    def check_sufficient_workspaces(self):
        """
        counting the existing workspaces and check if there is space for k-workspaces 
        """
        response = self.assistant.list_workspaces().get_result()
        k_fold_number = self.n_folds
        max_workspaces = config.max_workspaces

        if(len(response['workspaces'])+k_fold_number <= max_workspaces):
            logger.info("You have space to perform the k-fold test")
        else:
            remove = len(response['workspaces'])+k_fold_number-max_workspaces
            raise ValueError("The K-fold test will make you exceed the {}} workspaces limit. Make "
                             "sure to remove {} workspaces before creating the k-fold workspaces".format(remove, max_workspaces))
示例#3
0
    def delete_kfold_workspaces(self):
        """
        delete the workspaces when you dont need them anymore
        """

        workspaces = self.workspaces

        logger.info("Deleting temporary workspaces")

        for i in range(len(workspaces)):
            response = self.assistant.delete_workspace(
                workspace_id=workspaces[i]).get_result()

        self.workspaces = []
示例#4
0
    def create_kfold_WA(self, folds):
        """
        create the k-fold workspaces in WA

        :param folds: are the folds created in the function `create_folds`
        :return workspaces: is a list of workspaces ID generated 
        """
        logger.info("Creating kfold workspaces..")

        for i in range(len(folds)):
            train = folds[i]["train"]
            intents = self.create_intents(train)
            workspace_id = self.create_workspace(intents, i)
            self.workspaces.append(workspace_id)

        return self.workspaces
示例#5
0
def create_workspace_from_df(assistant,
                             name,
                             train_df,
                             description="",
                             poll_interval=20):
    """
    Wraps create_intents and create_workspace_from_df, then returns the skill ID when it has been trained.
    """

    intent_json = create_intents(assistant, train_df)
    skill_id = create_workspace(assistant, name, intent_json, description)

    status = ""

    while status != 'Available':
        logger.info("Waiting for {} skill to finish training..".format(name))
        status = assistant.get_workspace(
            workspace_id=skill_id).get_result()['status']
        time.sleep(poll_interval)

    return skill_id
示例#6
0
    def run_kfold_test(self, folds):
        """
        run the k-fold test. It is going to take folds as input and it will send the test dataframes to the right
        workspaces. 

        :param folds: output list from the function `create_folds`
        :return test_results: is list of results (dataframes) for each fold.  
        """
        test_results = pd.DataFrame()
        for i in range(len(folds)):
            logger.info("Running test for fold {}".format(i+1))
            test_index = folds[i]['test']
            df_test = self.training_df.iloc[test_index]
            df_test_reindexed = df_test.reset_index()
            results = self.test_kfold(df_test_reindexed, i)
            results["fold"] = i+1
            test_results = test_results.append(results)

        test_results.loc[:, "intent_correct"] = test_results["intent1"]
        test_results["intent_correct"] = np.where(
            (test_results["confidence1"] < self.threshold), "BELOW_THRESHOLD", test_results["intent1"])

        return test_results
示例#7
0
def main(topic_list, conf_matrix, save_master_data):
    skill_list = process_list_argument(topic_list, val_type=str)
    master_skill_id = None  # so exception works
    master_thresh = Credentials.calculate_workspace_thresh("master")

    try:
        id_dict = {
            skill: Credentials.workspace_id[active_adoption][skill]
            for skill in skill_list
        }
        timestr = generate_timestamp()  #  for use in all filenames

        # authenticate
        if "apikey" in instance_creds:
            logger.debug("Authenticating (apikey)")
            bs = blindset.blindset(
                apikey=instance_creds["apikey"],
                url=instance_creds["url"],
                version=conversation_version,
            )
        elif "password" in instance_creds:
            logger.debug("Authenticating (username/password)")
            bs = blindset.blindset(
                username=instance_creds["username"],
                password=instance_creds["password"],
                url=instance_creds["url"],
                version=conversation_version,
            )

        # check skills exist
        check_skills_exist(skill_list)

        #  import blindsets and generate master
        logger.info("Importing all blindsets and combining into master")
        blind_dict = dict()
        for skill in skill_list:
            bs_path = os.path.join(config.data_dir, f"{skill}_blindset.csv")
            blind_dict[skill] = bs.import_blindset(bs_path)

        master_blind_allcols = pd.concat(
            [v.assign(topic=k) for k, v in blind_dict.items()],
            axis=0,
            ignore_index=True,
            sort=False,
        )
        master_blind = master_blind_allcols[[
            "utterance", "topic"
        ]].rename(columns={"topic": "expected intent"})

        # generate master from topic training and push to WA
        logger.info("Getting training data from WA")
        train_dict = dict()
        for skill in skill_list:
            train_dict[skill] = wa_utils.get_training_data(
                bs.assistant, id_dict[skill])

        logger.info("Creating temporary master skill")
        master_train = pd.concat(
            [
                v.drop(columns=["intent"]).assign(intent=k)
                for k, v in train_dict.items()
            ],
            axis=0,
            ignore_index=True,
            sort=False,
        )
        master_skill_id = wa_utils.create_workspace_from_df(
            bs.assistant,
            name="master",
            train_df=master_train,
            description="generated by intent_training_tools",
        )

        # run blindset on master
        logger.info("Running blindset on master..")
        results_master = bs.run_blind_test(master_blind,
                                           master_skill_id,
                                           threshold=master_thresh)
        results_master["routing"] = results_master["intent1"]
        results_master.loc[results_master["confidence1"] < master_thresh,
                           "routing"] = "anything_else"

        # create blindsets for topics based on master results
        newblind_dict = dict()
        for skill in skill_list:
            # blindset for each skill is made up of utterances that have landed in that skill for master
            blind_utterances = results_master.loc[
                (results_master["intent1"] == skill)
                & (results_master["confidence1"] >= master_thresh),
                "original_text", ].tolist()
            newblind = master_blind_allcols[master_blind_allcols["utterance"].
                                            isin(blind_utterances)].copy()
            newblind.loc[newblind["topic"] != skill,
                         "expected intent"] = "anything_else"
            newblind_dict[skill] = newblind[["utterance", "expected intent"
                                             ]].reset_index(drop=True)

        # run blindsets on topics
        logger.info("Running blindset on topic skills..")
        results_dict = dict()
        for skill in skill_list:
            results_dict[skill] = bs.run_blind_test(
                newblind_dict[skill],
                id_dict[skill],
                threshold=Credentials.calculate_workspace_thresh(skill),
            )

        #  plot confusion matrices
        if conf_matrix:
            from conversation_test.confusionmatrix import ConfusionMatrix

            conf_output_path = lambda s: os.path.join(
                config.output_folder, f"{s}_multi_confmat_{timestr}.png")

            # master
            cfn = ConfusionMatrix(workspace_thresh=master_thresh)
            cfn.create(results_master, fig_path=conf_output_path("master"))

            #  topics
            for skill in skill_list:
                cfn = ConfusionMatrix(workspace_thresh=Credentials.
                                      calculate_workspace_thresh(skill))
                cfn.create(results_dict[skill],
                           fig_path=conf_output_path(skill))

            logger.info("Confusion matrix saved to results folder")

        # calculate metrics
        # master
        met = Metrics(workspace_thresh=master_thresh)
        metrics_master, _ = met.get_all_metrics(results_master,
                                                detailed_results=True)

        # topics
        metrics_dict = dict()
        res_with_conf_dict = dict()
        for skill in skill_list:
            met = Metrics(
                workspace_thresh=Credentials.calculate_workspace_thresh(skill))
            metrics_dict[skill], res_with_conf_dict[
                skill] = met.get_all_metrics(results_dict[skill],
                                             detailed_results=True)

        # topics - create overall view as if it's a single skill
        topics_res_with_conf = pd.concat(
            [v for k, v in res_with_conf_dict.items()],
            ignore_index=True,
            sort=False)

        results_master.loc[results_master["routing"] == "anything_else",
                           'confusion'] = 'FN'

        topics_res_with_conf = topics_res_with_conf.append(
            results_master,
            ignore_index=True,
            sort=False,
        )
        metrics_overall = met.calculate_metrics_per_intent(
            topics_res_with_conf, detailed_results=True)

        metrics_overall.loc[metrics_overall.index.isin(skill_list),
                            'threshold'] = master_thresh
        metrics_overall = metrics_overall.rename(
            index={s: s + ' - anything else'
                   for s in skill_list})

        # export results
        for skill in skill_list:
            results_dict[skill].to_csv(
                os.path.join(config.output_folder,
                             f"{skill}_multi_results_{timestr}.csv"),
                index=None,
            )
            metrics_dict[skill].to_csv(
                os.path.join(config.output_folder,
                             f"{skill}_multi_metrics_{timestr}.csv"))

        results_master.to_csv(
            os.path.join(config.output_folder,
                         f"master_multi_results_{timestr}.csv"),
            index=None,
        )
        metrics_master.to_csv(
            os.path.join(config.output_folder,
                         f"master_multi_metrics_{timestr}.csv"))
        metrics_overall.to_csv(
            os.path.join(config.output_folder,
                         f"overall_multi_metrics_{timestr}.csv"))
        logger.info("Results and metrics saved to output folder")

        if save_master_data:
            # export master blindset with both intent and topic labels to CSV
            master_blind_allcols.to_csv(
                os.path.join(config.data_dir,
                             f"master_blindset_{timestr}.csv"),
                index=None,
            )

            # export master training to CSV
            master_train.to_csv(
                os.path.join(config.data_dir,
                             f"master_training_{timestr}.csv"),
                header=None,
                index=None,
            )

            logger.info(
                "Master blindset and training have also been saved to the data folder"
            )

        #  delete master skill
        logger.info("Deleting temporary master skill")
        wa_utils.delete_workspace(bs.assistant, master_skill_id)

    except Exception as e:
        if master_skill_id is not None:
            # make sure master deleted anyway
            logger.info("Deleting temporary master skill before exit")
            wa_utils.delete_workspace(bs.assistant, master_skill_id)

        raise e
示例#8
0
def main(input_name, output_name, skill_name, sample_limit):
    import config  # data and output dirs

    # Read the input file
    input_path = os.path.join(config.data_dir, input_name)
    logger.info("Using input {}".format(input_path))
    df = pd.read_csv(input_path, header=None)
    outputFrameP = pd.DataFrame()

    # create output path
    if output_name is not None and skill_name is not None:
        raise ValueError(
            "Please only specify one of output_name and skill_name.")

    skill_name = skill_name or 'test'

    gen_test_set_folder = os.path.join(config.data_dir, 'generated_test_sets')
    if output_name != None:
        output_path = os.path.join(gen_test_set_folder, output_name)
    else:
        timestr = time.strftime("%Y%m%d-%H%M")
        output_path = os.path.join(
            gen_test_set_folder,
            f"{skill_name}_generator_output_{timestr}.csv")

    # for every row in the file
    logger.info("Splitting {} original utterances into permutations..".format(
        df.shape[0]))
    for n in range(df.shape[0]):
        # get the sentence and its intent
        sen = df.iat[n, 0]
        intent = df.iat[n, 1]
        # generate all the different sentences
        res = sentenceSplitter(sen)
        # flatten until flat (from nested lists to a single list)
        while (any(isinstance(el, list) for el in res)):
            res = flatten(res)
        # Distinguish between single elements and lists
        if (type(res) == type([])):
            #list
            res = map(lambda x: (x, intent), res)
            outputFrame = pd.DataFrame(res)
            outputFrameP = outputFrameP.append(outputFrame)
        else:
            #single element
            res = [(res, intent)]
            outputFrameP = outputFrameP.append(res)

    # reduce number of samples in final test set, to limit API calls when running tests
    # TODO: better alternative than random?
    if sample_limit is not None:
        # Take stratified sample over intents, returning all samples in the intent if there aren't enough.
        # Make up for the difference by randomly sampling from the remaining records that haven't been chosen yet.
        logger.info("Returned {} utterance in total; reducing to {}".format(
            len(outputFrameP), sample_limit))

        samples_per_intent = np.floor(outputFrameP[1].value_counts() *
                                      sample_limit /
                                      len(outputFrameP)).apply(int)
        df_sampled = pd.DataFrame()

        for intent in outputFrameP[1].unique():
            df_intent = outputFrameP[outputFrameP[1] == intent]

            if samples_per_intent[intent] > len(df_intent):
                df_sampled = df_sampled.append(df_intent)
            else:
                df_sampled = df_sampled.append(
                    df_intent.sample(samples_per_intent[intent]))

        # make up for rounding errors
        lendiff = sample_limit - len(df_sampled)
        extra_records = pd.concat(
            [outputFrameP,
             df_sampled]).drop_duplicates(keep=False).sample(lendiff)
        df_sampled = df_sampled.append(extra_records)

        output_df = df_sampled

    else:
        output_df = outputFrameP

    # write the frame to CSV without the idexes
    output_df.to_csv(output_path, header=None, index=False)
    logger.info("{} new utterances saved to {}".format(len(output_df),
                                                       output_path))
示例#9
0
def run_kfold(topic, no_folds, results_type, conf_matrix):
    """
    Runs kfold test using credentials in ../Credentials.py
    """

    # get credentials, import + export folders
    import Credentials
    active_adoption = Credentials.active_adoption
    instance_creds = Credentials.ctx[active_adoption]
    workspace_id = Credentials.workspace_id[active_adoption][topic]
    workspace_thresh = Credentials.calculate_workspace_thresh(topic)
    conversation_version = Credentials.conversation_version

    # import + export folders
    import config
    import time
    data_folder = config.data_dir
    export_folder = config.output_folder
    timestr = time.strftime("%Y%m%d-%H%M")

    output_loc_results = os.path.join(
        export_folder, "{}_kfold_results_raw_{}.csv".format(topic, timestr))
    output_loc_metrics = os.path.join(
        export_folder, "{}_kfold_results_metrics_{}.csv".format(topic, timestr))
    output_loc_confmat = os.path.join(
        export_folder, "{}_kfold_confmat_{}.png".format(topic, timestr))

    # authenticate
    if 'apikey' in instance_creds:
        logger.debug("Authenticating (apikey)")
        kf = kfoldtest(n_folds=no_folds, apikey=instance_creds['apikey'],
                       url=instance_creds['url'], threshold=workspace_thresh, version=conversation_version)
    elif 'password' in instance_creds:
        logger.debug("Authenticating (username/password)")
        kf = kfoldtest(n_folds=no_folds, username=instance_creds['username'], password=instance_creds['password'], url=instance_creds['url'], threshold=workspace_thresh,
                       version=conversation_version)

    # get train df from watson + check there are sufficient workspaces to run the test
    train_df = kf.intent_df_from_watson(workspace_id)
    kf.check_sufficient_workspaces()

    # create folds in WA if above is true
    folds = kf.create_folds(method='kfold')
    kf.create_kfold_WA(folds)

    available_flag = False

    while available_flag == False:
        logger.info("Checking workspaces..")
        available_flag = kf.check_workspaces_status()
        time.sleep(20)

    # run kfold test
    try:
        results = kf.run_kfold_test(folds)

        if (results_type == 'raw') or (results_type == 'all'):
            results.to_csv(output_loc_results)

        classification_report = kf.create_classification_report(results)

        if (results_type == 'metrics') or (results_type == 'all'):
            metrics = Metrics(workspace_thresh)
            metric_df = metrics.get_all_metrics_CV(
                results, fold_col='fold', detailed_results=False)
            metric_df.to_csv(output_loc_metrics)

        # TODO: confusion matrix
        if conf_matrix:
            from confusionmatrix import ConfusionMatrix
            cfn = ConfusionMatrix(workspace_thresh=workspace_thresh)
            cfn.create(results, fig_path=output_loc_confmat)
            logger.info("Confusion matrix saved to {}".format(
                output_loc_confmat))

    finally:
        # regardless of what happens above, delete the temporary workspaces before exiting
        kf.delete_kfold_workspaces()
示例#10
0
def run_blindset(topic, results_type, conf_matrix, blindset_name):
    """
    Runs blindset test using credentials in ../Credentials.py
    """

    # get credentials, import + export folders
    import Credentials
    active_adoption = Credentials.active_adoption
    instance_creds = Credentials.ctx[active_adoption]
    print(instance_creds)
    print('print works')

    workspace_id = Credentials.workspace_id[active_adoption][topic]
    workspace_thresh = Credentials.calculate_workspace_thresh(topic)
    conversation_version = Credentials.conversation_version

    # import + export folders
    import config
    import time
    data_folder = config.data_dir
    export_folder = config.output_folder
    timestr = time.strftime("%Y%m%d-%H%M")

    blindset_name = blindset_name or topic + "_blindset.csv"
    output_loc_results = os.path.join(
        export_folder, "{}_results_raw_{}.csv".format(topic, timestr))
    output_loc_metrics = os.path.join(
        export_folder, "{}_results_metrics_{}.csv".format(topic, timestr))
    output_loc_confmat = os.path.join(
        export_folder, "{}_confmat_{}.png".format(topic, timestr))

    # authenticate
    if 'apikey' in instance_creds:
        logger.debug("Authenticating (apikey)")
        bs = blindset(apikey=instance_creds['apikey'],
                      url=instance_creds['url'],
                      threshold=workspace_thresh,
                      version=conversation_version)
    elif 'password' in instance_creds:
        logger.debug("Authenticating (username/password)")
        bs = blindset(username=instance_creds['username'],
                      password=instance_creds['password'],
                      url=instance_creds['url'],
                      threshold=workspace_thresh,
                      version=conversation_version)

    # run test
    blindset_df = bs.import_blindset(os.path.join(data_folder, blindset_name))
    # TODO: check blindset df
    results = bs.run_blind_test(blindset_df, workspace_id)

    # exports + metrics
    if (results_type == 'raw') or (results_type == 'all'):
        cols_export = [
            col for col in results.columns.values if col != 'intent_correct'
        ]
        results[cols_export].to_csv(output_loc_results, encoding='utf-8')
        logger.info("Raw results exported to {}".format(output_loc_results))

    if (results_type == 'metrics') or (results_type == 'all'):
        met = Metrics(workspace_thresh)
        metric_df, _ = met.get_all_metrics(results, detailed_results=True)

        metric_df.to_csv(output_loc_metrics, encoding='utf-8')
        logger.info(
            "Metrics per intent exported to {}".format(output_loc_metrics))

    # confusion matrix
    if conf_matrix:
        from confusionmatrix import ConfusionMatrix
        cfn = ConfusionMatrix(workspace_thresh=workspace_thresh)
        cfn.create(results, fig_path=output_loc_confmat)
        #bs.plot_confusion_matrix(results, output_loc_confmat)
        logger.info("Confusion matrix saved to {}".format(output_loc_confmat))

    # print high-level metrics
    overall_metrics = bs.calculate_overall_metrics(results,
                                                   av_method="weighted")
    logger.info("Overall metrics for the workspace (weighted):")
    logger.info(overall_metrics)
示例#11
0
    def run_blind_test(self, test_set_df, workspace_id, **kwargs):
        """
        Runs blind set test and returns results df.

        Parameter: 
            test_set_df: the regression_test in csv format

        Return: 
            results: a Pandas dataframe with `original text`, `predicted intent` and also the results from WA
        """

        # if no threshold has been passed into the object, take one from the function args
        if self.threshold == False and 'threshold' not in kwargs:
            raise ValueError(
                "Must provide a threshold either to the blindset object or this function."
            )
        elif 'threshold' in kwargs:
            # threshold in function args overwrites one provided to the object, even if one has been set
            threshold = kwargs['threshold']
        else:
            threshold = self.threshold

        results = pd.DataFrame(columns=[
            'original_text', 'expected intent', 'r@1', 'TP', 'intent1',
            'confidence1', 'intent2', 'confidence2', 'intent3', 'confidence3'
        ])
        logger.info("Running blind test...")
        for i in tqdm(range(len(test_set_df))):

            text = test_set_df["utterance"][i]
            response = self.assistant.message(
                workspace_id=workspace_id,
                input={'text': text},
                context={'metadata': {
                    'user_id': 'intent_test_user1'
                }},
                alternate_intents=True)
            dumps = json.dumps(response.get_result(), indent=2)

            data = json.loads(dumps)

            no_intents = len(data['intents'])

            intent1 = data['intents'][0]['intent']
            confidence1 = data['intents'][0]['confidence']

            if no_intents >= 2:
                intent2 = data['intents'][1]['intent']
                confidence2 = data['intents'][1]['confidence']
            else:
                intent2 = confidence2 = ""

            if no_intents >= 3:
                intent3 = data['intents'][2]['intent']
                confidence3 = data['intents'][2]['confidence']
            else:
                intent3 = confidence3 = ""

            r_1 = (test_set_df["expected intent"][i] == intent1)
            tp = r_1 and (confidence1 >= self.threshold)
            results = results.append(
                {
                    'original_text': test_set_df["utterance"][i],
                    'expected intent': test_set_df["expected intent"][i],
                    'r@1': 1 * r_1,
                    'TP': 1 * tp,
                    'intent1': intent1,
                    'confidence1': confidence1,
                    'intent2': intent2,
                    'confidence2': confidence2,
                    'intent3': intent3,
                    'confidence3': confidence3,
                },
                ignore_index=True)

        results["intent_correct"] = results["intent1"]
        results["intent_correct"] = np.where(
            (results["confidence1"] < self.threshold), "BELOW_THRESHOLD",
            results["intent1"])

        return results