def predict(self, test_input, input_type, test_case_count=25): normalize = Normalize() if input_type == 'RANDOM_INPUT': input_count = 0 for question in test_input: input_count += 1 question_ = normalize.normalize(question) logging.debug('Test Case No.{}: {}'.format( input_count, str(question))) logging.debug('-' * (len(question) + 16)) logging.debug('Predicted Tags: {}'.format( self.tag_predictor(question_))) logging.debug('') else: test_idx = np.random.randint(len(test_input), size=test_case_count) logging.debug("Predicted Vs Ground Truth for {} sample".format( test_case_count)) logging.debug('-' * 50) logging.debug('') input_count = 0 for idx in test_idx: input_count += 1 test_case = idx question = str(X_test[test_case]) logging.debug('Test Case No.{}: {}'.format( input_count, question)) logging.debug('-' * 100) logging.debug("Question ID: {}".format(test_case)) logging.debug('Predicted: ' + str( self.tag_predictor(normalize.normalize_( X_test[test_case])))) logging.debug('Ground Truth: ' + str( self._tag_encoder.inverse_transform( np.array([y_test[test_case]])))) logging.debug('\n')
def predict(self, test_input, custom_input, test_case_count): normalize = Normalize() if custom_input: input_count = 0 #prediction_df = pd.DataFrame(columns = ["Que No","Questions", "Predicted_Tags"]) prediction_list = [] for question in test_input: input_count += 1 question_ = normalize.normalize(question) logging.debug('-' * (len(question) + 16)) logging.debug('Test Case No.{}: {}'.format( input_count, str(question))) predicted_tag = self.tag_predictor(question_) logging.debug('Predicted Tags: {}'.format(predicted_tag)) prediction_list.append({ 'que_no': input_count, 'questions': str(question), 'predicted_tags': predicted_tag }) #logging.debug('') logging.debug('') return prediction_list else: test_idx = np.random.randint(len(test_input), size=test_case_count) logging.debug("Predicted Vs Ground Truth for {} sample(s)".format( test_case_count)) logging.debug('-' * 50) logging.debug('') input_count = 0 input_predicted_list = [] prediction_score = 0 predicted_tag_list = [] prediction_list = [] #pd.DataFrame(columns = ["Que No", "Questions", "Ground_Truth","Predicted_Tags"]) for idx in test_idx: input_count += 1 test_case = idx question = str(test_input[test_case]) logging.debug('') logging.debug('-' * 100) logging.debug('Test Case No.{}:'.format(input_count)) logging.debug("Question ID: {}".format(test_case)) logging.debug('Question: {}'.format(question)) predicted_tag = self.tag_predictor( normalize.normalize_(question)) predicted_tag_list.append(predicted_tag) ground_truth = self._tag_encoder.inverse_transform( np.array([self._y_test[test_case]])) score = 0 ground_truth_ = [*ground_truth[0]] #predicted_tag_ = [*predicted_tag] for tag in predicted_tag: tags = [*tag] for tag in tags: if tag in ground_truth_: if (len(tag) > 0): score = 1 prediction_score += 1 break else: for gt_tag in ground_truth_: if (gt_tag.startswith(tag) or tag.startswith(gt_tag) ) and len(gt_tag) > 0: score = 1 prediction_score += 1 break prediction_current = { 'que_no': input_count, 'questions': question, 'ground_truth': str(ground_truth), 'predicted_tags': str(predicted_tag) } prediction_list.append(prediction_current) # append row to the dataframe input_predicted_list.append( [input_count, ground_truth, predicted_tag, score]) # log the ground truth & prediction logging.debug('Predicted: ' + str(predicted_tag)) logging.debug('Ground Truth: ' + str(ground_truth)) logging.debug('\n') accuracy = prediction_score / input_count self._accuracy = accuracy return prediction_list