import traceback from collections import namedtuple, defaultdict from itertools import count as it_count from log.Logger import Logger from request_type.Parser import StopWords, Parser from strategy.phrase_finder import PhraseFinder logger = Logger() phrase_finder = PhraseFinder() class JSONExportParser(Parser): def parse(self): try: response = dict() self.faq_payload = self.read_file('json') self.print_verbose('pre processing input data ...') stop_tokens = self.get_stopwords_for_json() questions_map, ques_to_altq_map = self.create_question_maps() response['question_map'] = questions_map response['altq_map'] = ques_to_altq_map response['stop_words'] = stop_tokens response['graph_synonyms'] = self.update_generated_synonyms( self.args['syn_file_path'], self.get_graph_level_synonyms()) return response except Exception: error_msg = traceback.format_exc() logger.error(error_msg) self.print_verbose(error_msg)
from collections import defaultdict from tqdm import tqdm from common import nlp, BOT_NAME from strategy.phrase_finder import PhraseFinder from log.Logger import Logger import copy import traceback import re from analyzer.kg_export.language.Lemmatize import Lemmatizer logger = Logger() phrase_finder_obj = PhraseFinder() lemma = Lemmatizer() class GramBasedGenerator(object): def __init__(self): pass @staticmethod def _filter_substrings(node_names): new_node_names = copy.deepcopy(node_names) for node_1 in node_names: node_1_stripped = node_1.strip() for node_2 in node_names: node_2_stripped = node_2.strip() try: if node_1_stripped != node_2_stripped: if node_2_stripped in node_1_stripped: new_node_names.remove(node_2) except Exception: