示例#1
0
 def on_modified(self, event):
     # /dict/phrase 被修改,重新加载词典
     if event.key[0] == 'modified' and (
             event.key[1].split(r'/')[-1] == 'phrase'
             or event.key[1].split(r'/')[-1]
             == 'phrase_local') and event.key[2] is False:
         lock.acquire()
         collector_service.reload_dict()
         lock.release()
     # /hanlp.properties 被修改,重新加载Hanlp分词词典
     if event.key[0] == 'modified' and 'hanlp' in event.key[
             1] and event.key[2] is False:
         hanlp_segmentor.HanlpSegmentor().reload_custom_dictionry()
示例#2
0
 def __init__(self):
     # get idf dict
     self.dit = {}
     with open("./idf_clean.txt", "r") as f:
         for line in f:
             line = line.strip("\n").split(",")
             k = line[0]
             v = float(line[1])
             if k not in self.dit:
                 self.dit[k] = v
             else:
                 pass
     f.close()
     self.sgementor = hanlp_segmentor.HanlpSegmentor()
示例#3
0
    def __init__(self):
        home_dir = os.path.dirname(
            os.path.abspath(inspect.getsourcefile(lambda: 0)))
        conf = configparser.ConfigParser()
        conf.read(CONFIG_FILE)

        # 各种路径
        self.model_file_path = home_dir + conf.get("crf", "model")
        self.template_file_path = home_dir + conf.get("crf", "template")

        self.train_file_path = home_dir + conf.get("crf", "train_file")
        self.test_file_path = home_dir + conf.get("crf", "test_file")
        self.predict_file_path = self.test_file_path + '_predict'

        self.segmentor = hanlp_segmentor.HanlpSegmentor()
示例#4
0
    def __init__(self):
        self.term_rank = term_ranking.TermRank()
        self.segmentor = hanlp_segmentor.HanlpSegmentor()
        self.ahocorasick = ac_search.ACSearch()

        home_dir = os.path.dirname(os.path.abspath(inspect.getsourcefile(lambda: 0)))
        conf = configparser.ConfigParser()
        conf.read(CONFIG_FILE)

        self.te = abc_time.ABCYear()

        self.phrase_dict_path = [home_dir + dict_file.strip() for dict_file in
                                 conf.get("dictionary", "phrase").split(';')]
        self.domain = conf.get("domain", "domain")
        self.phrase_dict = dict()
        self.reload_dict()
示例#5
0
    def __init__(self):
        self.term_rank = term_ranking.TermRank()
        self.segmentor = hanlp_segmentor.HanlpSegmentor()
        self.ahocorasick = ac_search.ACSearch()

        home_dir = os.path.dirname(os.path.abspath(inspect.getsourcefile(lambda: 0)))
        conf = configparser.ConfigParser()
        conf.read(CONFIG_FILE)
        useless_dict_path = home_dir + conf.get("dictionary", "norm_useless")
        self.ahocorasick.add_dict(useless_dict_path)
        self.ahocorasick.start()
        self.te = abc_time.ABCYear()

        weight_drop = home_dir + conf.get("dictionary", "weight_drop")
        self.weight_drop = set()
        with open(weight_drop) as f:
            for line in f:
                self.weight_drop.add(line.strip('\n'))

        self.phrase_dict_path = home_dir + conf.get("dictionary", "phrase")
        self.phrase_dict = dict()
        self.reload_dict()
示例#6
0
 def __init__(self, sentences_path):
     self.segmentor = hanlp_segmentor.HanlpSegmentor()
     self.sentences_path = sentences_path
 def __init__(self):
     self.get_tag_obj = SequenceTag()
     self.segmentor = hanlp_segmentor.HanlpSegmentor()