def _generate_additional_feats(self, query_list, doc_list,
                                   additional_feats_enabled):
        if additional_feats_enabled:
            self.logger.write_log(
                f'Building additional features between queries and documents')
            self.overlap_feats = Utils.compute_overlap_features(
                query_list, doc_list)
        else:
            self.logger.write_log(
                f'Additional features disabled - building not needed')
            self.overlap_feats = np.zeros(len(query_list))

        # determine max feat len
        if self.overlap_feats.ndim > 1:
            self.addit_feat_len = self.overlap_feats.shape[1]
예제 #2
0
    def predict(self):
        if self.tweet_pred is None:
            raise Exception(
                'Can not Start Predicting without any Prediction Tweet!')

        # perform pre-processing
        clean_tweet_pred = Utils.preprocess_tweet(self.tweet_pred)

        # build doc list by duplicate tweet prediction foreach line in bot list
        tweet_pred_list = [clean_tweet_pred] * len(self.bot_list)

        # convert tweet predicted to sequence
        temp_pred_list = [clean_tweet_pred]
        x_temp_pred_list = Utils.convert_text_to_sequences(
            self.tokenizer, temp_pred_list, self.max_text_len)

        # duplicate sequence to the length of bot size list
        x_doc_list = [x_temp_pred_list[0]] * len(self.bot_list)
        x_doc_list = np.array(x_doc_list)

        # calculate word overlapping additional feature
        if self.additional_feats_enabled:
            additional_feat = Utils.compute_overlap_features(
                self.bot_list, tweet_pred_list)
        else:
            additional_feat = np.zeros(len(self.bot_list))

        # perform the prediction operation
        predict_list = self.model.predict(
            [self.x_bot_list, x_doc_list, additional_feat],
            verbose=1,
            callbacks=[self.callback_predict])

        # calculate and save the how much current tweet similar to training bots list
        self.bot_similarity_score = len(
            list(filter(lambda x: x > 0.5, predict_list))) / len(predict_list)