def words_split(): global keywords # mysql 中的ids oldids = Job.objects.values_list('jobId') oldidset = set() for comp in oldids: oldidset.add(comp[0]) # hbase 中的 ids try: newidset = hbase_tool.getalljobid() except BrokenPipeError as e: print(e.strerror) return # TODO 修改 # newset = newidset newset = newidset - oldidset print("start split words") # 缓存keyword对象 allkw = Keyword.objects.all() for kw in allkw: keywords[kw.keyword] = kw for id in newset: keyword = hbase_tool.getkeyword_byjobid(id) s = hbase_tool.getjobinfo_byjobid(id) s = str(s).strip() # 判断缓存中是否存在 if (keywords.get(keyword) is None): print("new keyword : ", keyword) newkeyword = Keyword() newkeyword.keyword = keyword newkeyword.save() kw = Keyword.objects.get(keyword__contains=keyword) keywords[keyword] = kw executor.submit(thread_deal, s, keyword)