示例#1
0
def words_split():
    global keywords

    # mysql 中的ids
    oldids = Job.objects.values_list('jobId')
    oldidset = set()
    for comp in oldids:
        oldidset.add(comp[0])
    # hbase 中的 ids
    try:
        newidset = hbase_tool.getalljobid()
    except BrokenPipeError as e:
        print(e.strerror)
        return

    # TODO 修改
    # newset = newidset
    newset = newidset - oldidset

    print("start split words")
    # 缓存keyword对象
    allkw = Keyword.objects.all()
    for kw in allkw:
        keywords[kw.keyword] = kw
    for id in newset:
        keyword = hbase_tool.getkeyword_byjobid(id)
        s = hbase_tool.getjobinfo_byjobid(id)
        s = str(s).strip()

        # 判断缓存中是否存在
        if (keywords.get(keyword) is None):
            print("new keyword : ", keyword)
            newkeyword = Keyword()
            newkeyword.keyword = keyword
            newkeyword.save()
            kw = Keyword.objects.get(keyword__contains=keyword)
            keywords[keyword] = kw
        executor.submit(thread_deal, s, keyword)