Exemplo n.º 1
0
def index_one(k, item):
    """
    添加一个索引
    """
    ss = tkitSearch.Search()
    data = [{
        'title': ",".join(item.get("kg")),
        'content': item.get("sentence"),
        'path': k
    }]
    # print(data)
    ss.add(data)
Exemplo n.º 2
0
def run_index_task():
    # tt=tkitText.Text()
    ss = tkitSearch.Search()
    # ss.init_search()
    labels = [1, 2]
    states = ['1', '2']
    i = 0
    for k, item in kg.recheck_all():
        if (item.get("state") == None and item.get("label") in labels) or (
                item.get("state") in states and item.get("label") in labels):
            # print(item)
            if i % 1000 == 0:
                print(i)
            i = i + 1
            data = [{
                'title': ",".join(item.get("kg")),
                'content': item.get("sentence"),
                'path': k
            }]
            # print(data)
            ss.add(data)
Exemplo n.º 3
0
def kg_list():
    """
    label all 0,1,2
    过滤


    """
    tt = tkitText.Text()
    # label all 0,1,2
    # print(label)
    i = 0
    items = []

    ss = tkitSearch.Search()
    keyword = request.args.get('keyword')
    start = request.args.get('start')
    label = request.args.get('label')
    tp = request.args.get('type')
    state = request.args.get('state')
    check = request.args.get('check')
    if check != None or check == '':
        check = True
    limit = request.args.get('limit')
    if limit == None:
        limit = 20
    if start == None:
        # kg.tdb.load("var")
        # try:
        #     start=kg.tdb.get("list_start")
        # except:
        #     pass
        pass
    print("start", start)

    # kg.tdb.load("kg_mark")
    print(state)
    if state == None:
        state = "2"
    # states=[]
    if label == None or len(label) == 0:
        label = 2
    if keyword == None or len(keyword) == 0:
        print("no kw")
        jump = ["目", "是", '市镇']
        # for k, v in kg.tdb.get_all(start=start):
        q = {'check': check, "state": state, 'label': int(label)}
        print('q', q)
        for item in DB.kg_mark.find(q).limit(int(limit)):
            k = item['_id']
            print(item)
            # try:
            #     item = kg.tdb.str_dict(v)
            # except:
            #     pass
            # if i >= 100:
            #     kg.tdb.load("var")
            #     # kg.tdb.get("list_start")
            #     kg.tdb.put_data([('list_start',list_start)])
            #     print('list_start',list_start)
            #     break
            # 索引数据
            # index_one(k, item)
            # if item.get('kg') != None and item.get('state') == state and item.get('check') == check:
            # if item.get('kg') != None:
            # index_one(k, item)
            #自动跳过
            # if item.get('kg')[1] in jump:
            #     continue
            print('选择', item)
            p, pr = pre(item)
            item['pre'] = pr
            item['ai'] = p
            # 自动保存进程
            item['check'] = True
            item['state'] = '2'
            # key=tt.md5(item["sentence"]+','.join(item['kg']))
            kg.mark_sentence(k, item)

            s = item['sentence']
            for i, w in enumerate(item['kg']):
                s = s.replace(
                    w, "<span class='kg_" + str(i) + "'>" + w + "</span>")
            item['sentence_mark'] = s

            if label == "all":
                items.append((k, item))
                i = i + 1
                list_start = k
            elif item.get('label') == int(label):
                items.append((k, item))
                i = i + 1
                list_start = k

    else:
        q = {
            'check': check,
            "kg": keyword,
            "state": state,
            'label': int(label)
        }
        print('q', q)
        for item in DB.kg_mark.find(q).limit(int(limit)):
            k = item['_id']
            # print(item)
            # print("kkk")
            # if tp == 'title':
            #     result = ss.find_title(keyword)
            # else:
            #     result = ss.find(keyword)
            # # print(result)
            # for one in result:
            # v = kg.tdb.get(one['path'])
            # k = one['path']
            # try:
            #     item = kg.tdb.str_dict(v)
            # except:
            #     continue
            #     pass
            # if item.get('kg')!=None and item.get('state')=='2':
            if item.get('kg') != None and item.get(
                    'state') == state and item.get('check') == check:
                # # 预测内容的概率
                # p, pr = pre(item)
                # item['pre'] = pr
                # item['ai'] = p
                item['pre'] = [(0, 1), (0, 1)]
                item['ai'] = 2
                # 自动保存进程
                item['check'] = True
                item['state'] = '2'
                print("保存数据", item)
                # key=tt.md5(item["sentence"]+','.join(item['kg']))
                kg.mark_sentence(k, item)

                s = item['sentence']
                for i, w in enumerate(item['kg']):
                    s = s.replace(
                        w, "<span class='kg_" + str(i) + "'>" + w + "</span>")
                item['sentence_mark'] = s

                if label == "all":
                    items.append((k, item))
                    i = i + 1
                elif item.get('label') == label or item.get('label') == int(
                        label):
                    items.append((k, item))
                    # print("3333")
                    i = i + 1

    # for x in dir():
    #     print(x,sys.getsizeof(x)/1024/1024,'mb')
    del ss
    gc.collect()

    # if item.get('kg')==None or len(item.get('kg'))==0:
    if len(items) > 0:
        q = {'check': True, "state": '2'}
        checked = DB.kg_mark.find(q).count()
        q = {'check': None, "state": state}
        uncheck = DB.kg_mark.find(q).count()
        return render_template("list.html", **locals())
    else:
        return "没有数据"
Exemplo n.º 4
0
 def __init__(self):
     tkitFile.File().mkdir("../tdata")
     
     self.tdb= tkitDb.LDB(path="../tdata/lv.db")
     self.ss=tkitSearch.Search()
     pass