def _perform_pre_processing(self, bot_tweet, doc_tweet, length_valid=3): bot_tweet = Utils.preprocess_tweet(bot_tweet) doc_tweet = Utils.preprocess_tweet(doc_tweet) length_valid = True \ if (len(bot_tweet) >= length_valid and len(doc_tweet) >= length_valid) \ else False return bot_tweet, doc_tweet, length_valid
def predict(self): if self.tweet_pred is None: raise Exception( 'Can not Start Predicting without any Prediction Tweet!') # perform pre-processing clean_tweet_pred = Utils.preprocess_tweet(self.tweet_pred) # build doc list by duplicate tweet prediction foreach line in bot list tweet_pred_list = [clean_tweet_pred] * len(self.bot_list) # convert tweet predicted to sequence temp_pred_list = [clean_tweet_pred] x_temp_pred_list = Utils.convert_text_to_sequences( self.tokenizer, temp_pred_list, self.max_text_len) # duplicate sequence to the length of bot size list x_doc_list = [x_temp_pred_list[0]] * len(self.bot_list) x_doc_list = np.array(x_doc_list) # calculate word overlapping additional feature if self.additional_feats_enabled: additional_feat = Utils.compute_overlap_features( self.bot_list, tweet_pred_list) else: additional_feat = np.zeros(len(self.bot_list)) # perform the prediction operation predict_list = self.model.predict( [self.x_bot_list, x_doc_list, additional_feat], verbose=1, callbacks=[self.callback_predict]) # calculate and save the how much current tweet similar to training bots list self.bot_similarity_score = len( list(filter(lambda x: x > 0.5, predict_list))) / len(predict_list)