def _perform_pre_processing(self, bot_tweet, doc_tweet, length_valid=3):
        bot_tweet = Utils.preprocess_tweet(bot_tweet)
        doc_tweet = Utils.preprocess_tweet(doc_tweet)
        length_valid = True \
            if (len(bot_tweet) >= length_valid and len(doc_tweet) >= length_valid) \
            else False

        return bot_tweet, doc_tweet, length_valid
示例#2
0
    def predict(self):
        if self.tweet_pred is None:
            raise Exception(
                'Can not Start Predicting without any Prediction Tweet!')

        # perform pre-processing
        clean_tweet_pred = Utils.preprocess_tweet(self.tweet_pred)

        # build doc list by duplicate tweet prediction foreach line in bot list
        tweet_pred_list = [clean_tweet_pred] * len(self.bot_list)

        # convert tweet predicted to sequence
        temp_pred_list = [clean_tweet_pred]
        x_temp_pred_list = Utils.convert_text_to_sequences(
            self.tokenizer, temp_pred_list, self.max_text_len)

        # duplicate sequence to the length of bot size list
        x_doc_list = [x_temp_pred_list[0]] * len(self.bot_list)
        x_doc_list = np.array(x_doc_list)

        # calculate word overlapping additional feature
        if self.additional_feats_enabled:
            additional_feat = Utils.compute_overlap_features(
                self.bot_list, tweet_pred_list)
        else:
            additional_feat = np.zeros(len(self.bot_list))

        # perform the prediction operation
        predict_list = self.model.predict(
            [self.x_bot_list, x_doc_list, additional_feat],
            verbose=1,
            callbacks=[self.callback_predict])

        # calculate and save the how much current tweet similar to training bots list
        self.bot_similarity_score = len(
            list(filter(lambda x: x > 0.5, predict_list))) / len(predict_list)