Exemplo n.º 1
0
    def search_mdx_entry(self):
        result_list = self.mdx.look_up(self.query)
        self.result_list = result_list
        # result_list 0:start,1:end,2:r_p1,3:r_p2:4:entry,5:record

        self.f_pk = self.dic.pk
        r_list = []
        for rt in result_list:
            self.f_p1 = rt[2]
            self.f_p2 = rt[3]
            self.cmp.clear()
            record = self.substitute_record(rt[5])
            if record != '':
                # 这里self.f_p2应该是不正确的,可能需要将自身的r_p1,r_p2也写入rsult_list中
                r_list.append(
                    mdxentry(self.dic_name, rt[4], record, self.prior,
                             self.dic.pk, self.f_pk, self.f_p1, self.f_p2))

        for i in range(len(r_list) - 1, -1, -1):
            if r_list[i].mdx_record.find(
                    '@@@LINK') == 0 or r_list[i].mdx_record == '':
                del r_list[i]
        # 英文维基part3查back substitution结果是@@@LINK=Triangular matrixForward and back substitution,
        # 但是指向的词条不存在,因此返回为空。

        return r_list
Exemplo n.º 2
0
def lemmatize_func(query, record_list, is_en):
    # 设置成自定义功能,全局启用原形推测,仅在查询无结果时启用原形推测,关闭原形推测,推荐第二种设置。
    words_list = []
    if is_en:
        words_list = lemmatize_word(query)

    words_list_len = len(words_list)
    mdict = []
    if words_list_len > 0:
        mdict.append('<div>' + query + '原形推测:</div>')
        for w in words_list:
            mdict.append('<div><span class="badge badge-pill badge-light">' +
                         w[1] + '</span><a href="entry://' + w[0] + '">' +
                         w[0] + '</a></div>')

        # if len(record_list) == 0:
        record_list.append(
            mdxentry(builtin_dic_name, '', ''.join(mdict), 0, -1, -1, -1, -1))
        # else:
        #     if record_list[len(record_list) - 1].mdx_name == builtin_dic_name:
        #         b = record_list[len(record_list) - 1]
        #
        #         txt = b.mdx_record
        #         txt += '<hr />'
        #         txt += ''.join(mdict)
        #         b.mdx_record = txt
        #         del record_list[len(record_list) - 1]
        #         record_list.append(b)
        #     else:
        #         record_list.append(mdxentry(builtin_dic_name, '', ''.join(mdict), builtin_dic_pror, -1, -1, -1, -1))
    return record_list
Exemplo n.º 3
0
def extract_bultin_dic_all(r_list):
    mdict = []
    mdx_entry = []
    r_list_len = len(r_list)
    mdict.append(builtin_dic_prefix)

    i = 1

    for r in r_list:
        mdx_entry.append(r.mdict_entry + '/')  # 将显示的词条名用斜杠分隔开

        mdict_content = get_mdict_content(r)

        if 1 <= i < r_list_len:
            mdict_content.append('<hr />')
        i += 1

        mdict.append(''.join(mdict_content))
    if r_list_len > 0:
        entry_t = ''.join(mdx_entry)
        entry_t = entry_t[:len(entry_t) - 1]  # 去掉entry后面的最后一个斜杠
        return mdxentry(builtin_dic_name, entry_t, ''.join(mdict), 0, -1, -1,
                        -1, -1)
    else:
        return None
Exemplo n.º 4
0
def key_spellcheck(query, record_list, is_en):
    # 拼写检查
    # 对只含字母,短横杠、撇号和空格的单词进行拼写检查
    if is_en:
        # 这里设置成自定义配置,是否开启拼写检查,始终开启拼写检查,仅当查询无结果时开启拼写检查,关闭拼写检查,推荐第二种设置。
        '''
        if len(record_list) > 0 and record_list[0].mdx_name == builtin_dic_name:
            c_list = spellcheck(query)
            for i in range(len(c_list) - 1, -1, -1):
                if c_list[i] == query.lower():
                    del c_list[i]
            if len(c_list) > 0:
                if len(c_list) > 9:
                    c_list = c_list[:9]
                mdxentry_t = record_list[0]
                mdict_content = list(mdxentry_t.mdx_record)
                mdict_content.append('<hr /><div>' + query + '拼写检查:</div>')
                for c in c_list:
                    mdict_content.append('<div><a href="entry://' + c + '">' + c + '</a></div>')
                mdxentry_t.mdx_record = ''.join(mdict_content)
                record_list.pop(0)
                record_list.append(mdxentry_t)
        '''
        c_list = spellcheck(query)
        for i in range(len(c_list) - 1, -1, -1):
            if c_list[i] == query.lower():
                del c_list[i]
        if len(c_list) > 0:
            if len(c_list) > 9:
                c_list = c_list[:9]
            mdict_content = ['<div>' + query + '拼写检查:</div>']
            for c in c_list:
                mdict_content.append('<div><a href="entry://' + c + '">' + c +
                                     '</a></div>')
            # record_list.insert(0, mdxentry(builtin_dic_name, query, ''.join(mdict_content), 20, -1, -1, -1))
            record_list.append(
                mdxentry(builtin_dic_name, query, ''.join(mdict_content), 20,
                         -1, -1, -1, -1))
    return record_list
Exemplo n.º 5
0
def merge_record(query, record_list):
    # 多于4个的词条进行合并,主要是处理国家教育研究院双语词汇双向词典,该词典查variation有27个词条。
    # 长度小于500才合并,原因是英和中词典的a词条都合并起来,特别特别长,iframe展开时,台式机要卡住好长时间才能显示
    merge_entry_num = get_config_con('merge_entry_num')
    merge_entry_max_length = get_config_con('merge_entry_max_length')

    dic_dict = {}
    for i in range(len(record_list)):
        mdx_name = record_list[i].mdx_name
        mdx_record = record_list[i].mdx_record
        if len(mdx_record) < merge_entry_max_length:  # 这里暂时性的判断
            if mdx_name in dic_dict:
                dic_dict[mdx_name] += 1
            else:
                dic_dict[mdx_name] = 1

    dic_list = []

    for k, v in dic_dict.items():
        if v >= merge_entry_num:
            dic_list.append(k)

    t_list = []
    for dic in dic_list:
        record = ''
        pror = 1
        for i in range(len(record_list) - 1, -1, -1):
            mdx = record_list[i]
            if mdx.mdx_name == dic:
                record = mdx.mdx_record + record
                pror = mdx.mdx_pror
                del record_list[i]
        t_list.append(mdxentry(dic, query, record, pror, -1, -1, -1, -1))

    record_list.extend(t_list)
    return record_list