def start_predict(self):
        # get predicting parameters
        model_name = str(self.ui.combobox_model.currentText())
        tweets_file = self.ui.textbox_tweets_file.text()
        header_included = self.ui.checkbox_header.isChecked()
        random_tweets = self.ui.spinbox_rand_tweets.value()

        # check for file validity
        try:
            Utils.file_validation(tweets_file, 'Tweet')
        except Exception as ex:
            Utils.show_msg(text=ex.args[0], title="Input Error")
            return

        # reset values of widgets
        self.reset_form()

        # disable all widgets
        self.disable_widgets(True)

        self.model_callback = CallBackMultiPredictNNet(
            self.update_batch_progress, self.update_tweet_progress,
            random_tweets)

        self.predictor = MultiPredictor(model_name, self.model_callback,
                                        tweets_file, header_included,
                                        random_tweets)

        # create a thread for predictor
        self.pred_thread = ModelPredictorThread(self.predictor)
        self.pred_thread.finished.connect(self.on_predict_finished)
        self.pred_thread.start()
    def _perform_pre_processing(self, bot_tweet, doc_tweet, length_valid=3):
        bot_tweet = Utils.preprocess_tweet(bot_tweet)
        doc_tweet = Utils.preprocess_tweet(doc_tweet)
        length_valid = True \
            if (len(bot_tweet) >= length_valid and len(doc_tweet) >= length_valid) \
            else False

        return bot_tweet, doc_tweet, length_valid
 def export_finished(self):
     if self.export_thread.is_success():
         Utils.show_msg(text="Exporting Complete!",
                        title="Successful",
                        msg_type=QMessageBox.Information)
         os.startfile(self.export_thread.excel_path)
     else:
         Utils.show_msg(text=self.export_thread.error,
                        title="Error",
                        msg_type=QMessageBox.Critical)
Пример #4
0
    def on_train_finished(self):
        if self.model_thread.is_success():
            if self.stop_requested:
                self.log.enable_log()
                self.log.write_log('Stopped Process Done Successfully!')
            else:
                self.ui.btn_save.setDisabled(False)
                self.log.write_log('Training Process Completed Successfully!')
        else:
            Utils.show_msg(text=self.model_thread.error, title="Error")

        self.change_widgets_disabled(False)
    def on_predict_finished(self):
        self.disable_widgets(False)

        if not self.need_stop:
            if self.pred_thread.is_success():
                self.classify_tweets()
            else:
                Utils.show_msg(text=self.pred_thread.error, title="Error")
                self.ui.btn_save.setDisabled(True)
                self.ui.groupbox_threshold.setDisabled(True)
        else:
            self.reset_form()
            self.ui.btn_save.setDisabled(True)
            self.ui.groupbox_threshold.setDisabled(True)
Пример #6
0
    def start_predict(self):
        # get predicting parameters
        model_name = str(self.ui.combobox_model.currentText())
        bot_file = self.ui.textbox_bot_file.text()
        human_file = self.ui.textbox_human_file.text()
        bot_tweets = self.ui.spinbox_bot_tweets.value()
        human_tweets = self.ui.spinbox_human_tweets.value()
        total_tweets = bot_tweets + human_tweets

        # check for file validity
        try:
            Utils.file_validation(bot_file, 'Tweet')
            Utils.file_validation(human_file, 'Tweet')
        except Exception as ex:
            Utils.show_msg(text=ex.args[0], title="Input Error")
            return

        # reset values of widgets
        self.reset_form()

        # disable all widgets
        self.disable_widgets(True)

        self.model_callback = CallBackMultiPredictNNet(
            self.update_batch_progress, self.update_tweet_progress,
            total_tweets)

        self.predictor = ModelTestPredictor(model_name, self.model_callback,
                                            bot_file, human_file, bot_tweets,
                                            human_tweets)

        # create a thread for predictor
        self.pred_thread = ModelPredictorThread(self.predictor)
        self.pred_thread.finished.connect(self.on_predict_finished)
        self.pred_thread.start()
    def on_predict_finished(self):
        if self.pred_thread.is_success():
            bot_sim_score = self.predictor.get_similarity_score()
            bot_percentage = int(round(bot_sim_score * 100))
            human_percentage = 100 - bot_percentage

            # calculate the max value for ui
            scores_arr = [bot_percentage, human_percentage]
            group_boxes_arr = [self.ui.groupbox_bot, self.ui.groupbox_human]
            max_element = np.argmax(scores_arr)
            min_element = 1-max_element

            self.update_ui_scores(bot_percentage, human_percentage)

            group_boxes_arr[max_element].setDisabled(False)
            group_boxes_arr[min_element].setDisabled(True)
        else:
            Utils.show_msg(text=self.pred_thread.error, title="Error")

        self.ui.btn_start.setDisabled(False)
        self.ui.textbox_tweet.setReadOnly(False)
        self.ui.combobox_model.setDisabled(False)
Пример #8
0
    def predict(self):
        if self.tweet_pred is None:
            raise Exception(
                'Can not Start Predicting without any Prediction Tweet!')

        # perform pre-processing
        clean_tweet_pred = Utils.preprocess_tweet(self.tweet_pred)

        # build doc list by duplicate tweet prediction foreach line in bot list
        tweet_pred_list = [clean_tweet_pred] * len(self.bot_list)

        # convert tweet predicted to sequence
        temp_pred_list = [clean_tweet_pred]
        x_temp_pred_list = Utils.convert_text_to_sequences(
            self.tokenizer, temp_pred_list, self.max_text_len)

        # duplicate sequence to the length of bot size list
        x_doc_list = [x_temp_pred_list[0]] * len(self.bot_list)
        x_doc_list = np.array(x_doc_list)

        # calculate word overlapping additional feature
        if self.additional_feats_enabled:
            additional_feat = Utils.compute_overlap_features(
                self.bot_list, tweet_pred_list)
        else:
            additional_feat = np.zeros(len(self.bot_list))

        # perform the prediction operation
        predict_list = self.model.predict(
            [self.x_bot_list, x_doc_list, additional_feat],
            verbose=1,
            callbacks=[self.callback_predict])

        # calculate and save the how much current tweet similar to training bots list
        self.bot_similarity_score = len(
            list(filter(lambda x: x > 0.5, predict_list))) / len(predict_list)
    def _generate_additional_feats(self, query_list, doc_list,
                                   additional_feats_enabled):
        if additional_feats_enabled:
            self.logger.write_log(
                f'Building additional features between queries and documents')
            self.overlap_feats = Utils.compute_overlap_features(
                query_list, doc_list)
        else:
            self.logger.write_log(
                f'Additional features disabled - building not needed')
            self.overlap_feats = np.zeros(len(query_list))

        # determine max feat len
        if self.overlap_feats.ndim > 1:
            self.addit_feat_len = self.overlap_feats.shape[1]
Пример #10
0
    def _build_users_dict(self, query_file):
        self.logger.write_log(
            f'Building dictionary of each user and its tweets')

        curr_user = None
        users_dict = {}  # dictionary of lists
        for line in query_file:
            user = Utils.get_user_from_tweet(line)

            if user is not None:  # found tweet with username
                curr_user = user

            if curr_user is not None:
                if curr_user not in users_dict:  # it's the first tweet of this user, add it
                    users_dict[curr_user] = [line]
                else:
                    users_dict[curr_user].append(line)
                # elif line not in users_dict[curr_user]:  # append to exists user only if not exists
                #     users_dict[curr_user].append(line)

        return users_dict
Пример #11
0
    def start_train(self):
        # get training parameters
        embedding_file = self.ui.textbox_embed.text()
        bot_file = self.ui.textbox_bot.text()
        human_file = self.ui.textbox_human.text()
        train_split = self.ui.slider_train.value() / 100.0
        test_split = 1 - train_split
        val_split = self.ui.slider_val.value() / 100.0
        epoches = self.ui.spinbox_epoches.value()
        batch_size = self.ui.spinbox_batch.value()
        addit_feat_enabled = self.ui.checkbox_additional_feats.isChecked()
        early_stop = self.ui.spinbox_earlystop.value()

        # get dataset config from combobox
        gen_method = str(self.ui.combobox_gen_method.currentText())
        if gen_method == "User Grouping":
            dataset_config = DatasetConfig.USER_STATE
        elif gen_method == "Random Pairing":
            dataset_config = DatasetConfig.RANDOM_STATE

        # Check for early stop validity
        if early_stop > epoches:
            Utils.show_msg(
                text=
                "Can not Insert Early Stop Epochs\nThat Bigger Than Training Epochs Number!",
                title="Input Error")
            return

        # check for files validity
        try:
            Utils.file_validation(embedding_file, 'Embedding')
            Utils.file_validation(bot_file, 'Bot')
            Utils.file_validation(human_file, 'Human')
        except Exception as ex:
            Utils.show_msg(text=ex.args[0], title="Input Error")
            return

        # reset progressbars
        self.ui.progressbar_epoches.setValue(0)
        self.ui.progressbar_batch.setValue(0)

        # reset graphs
        self.reset_graphs()

        # disable unnecessery widgets when starting training
        self.change_widgets_disabled(True)
        self.ui.btn_save.setDisabled(True)

        self.log.write_log("Start pre-training phase...")

        # create model instance with all parameters
        self.custom_callback = CallBackTrainNNet(self.log, self.draw_graphs,
                                                 self.batch_graphs_clear,
                                                 self.update_progressbars,
                                                 self.get_status_stopped,
                                                 self.MAX_BATCH,
                                                 self.update_batch_range)

        self.model = ModelTrainer(logger=self.log,
                                  embedding_file=embedding_file,
                                  bots_file=bot_file,
                                  human_file=human_file,
                                  validation_split=val_split,
                                  test_split=test_split,
                                  batch_size=batch_size,
                                  epochs=epoches,
                                  additional_feats_enabled=addit_feat_enabled,
                                  early_stopping=early_stop,
                                  dataset_config=dataset_config,
                                  custom_callback=self.custom_callback)

        # create a thread for training phase
        self.model_thread = ModelTrainerThread(self.model)
        self.model_thread.finished.connect(self.on_train_finished)
        self.model_thread.start()  # run the thread to start training
Пример #12
0
    def train_model(self):
        # load exists dataset or create a new one if not exists
        #self._load_dataset()

        # build dataset for training
        self.dataset.perform_build(self.bots_file, self.human_file,
                                   self.additional_feats_enabled)

        self.logger.write_log('Splitting datasets into train and test sets')

        data_train, data_test = self._split_train_test_sets()
        q_train, d_train, addn_feat_train, y_train = data_train
        q_test, d_test, addn_feat_test, y_test = data_test

        self.logger.write_log(f'trains samples: {len(q_train)}')
        self.logger.write_log(f'test samples: {len(q_test)}')

        # extract some parameters that uses for our model
        vocabulary = self.dataset.tokenizer.index_word
        max_text_len = self.dataset.max_text_len
        addit_feat_len = self.dataset.addit_feat_len
        tokenizer = self.dataset.tokenizer

        # convert texts to sequences
        self.logger.write_log('convert texts to sequences')
        x_q_train = Utils.convert_text_to_sequences(tokenizer, q_train,
                                                    max_text_len)
        x_d_train = Utils.convert_text_to_sequences(tokenizer, d_train,
                                                    max_text_len)
        x_q_test = Utils.convert_text_to_sequences(tokenizer, q_test,
                                                   max_text_len)
        x_d_test = Utils.convert_text_to_sequences(tokenizer, d_test,
                                                   max_text_len)

        # prepare data for predicting
        self.bot_tweets = self._get_unique_matches(q_train, y_train)
        self.x_bot_tweets = Utils.convert_text_to_sequences(
            tokenizer, self.bot_tweets, max_text_len)

        self.bot_test_tweets = q_test
        self.doc_test_tweets = d_test
        self.labels_test = y_test

        # create our model with embedding matrix
        self.model = self._create_model(vocabulary, max_text_len,
                                        addit_feat_len)

        self.logger.write_log(f'Start training process..')

        # start fitting model
        history = self.model.fit([
            np.array(x_q_train),
            np.array(x_d_train),
            np.array(addn_feat_train)
        ],
                                 np.array(y_train),
                                 epochs=self.epochs,
                                 batch_size=self.batch_size,
                                 verbose=1,
                                 validation_split=self.validation_split,
                                 callbacks=self._get_callbacks())
Пример #13
0
 def _get_unique_matches(self, query_train, label_train):
     lst = list(zip(query_train, label_train))
     lst_matches = list(filter(lambda x: x[1] == 1, lst))
     bot_matches, _ = list(map(list, zip(*lst_matches)))
     unique_matches = Utils.remove_duplicates(bot_matches)
     return unique_matches