Пример #1
0
 postTitle = postDict[postID]
 cursor.execute("select post_content from wp_posts where ID=" +
                str(postID) + ";")
 data = cursor.fetchone()
 postContent = data[0]
 mode = '<div class="mw-highlight mw-content-ltr" dir="ltr">([\S\s]*?)</div>'
 postContent = re.sub(mode, "", postContent)
 if postContent == "":
     del postDict[postID]
     delete += 1
 else:
     postContent = PyQuery(postContent)
     postContent = postContent.text()
     postIDToPostContentDict[postID] = postContent
     for wikiEntryID, wikiEntryTitle in wikiEntryDict.iteritems():
         if postContent.count(wikiEntryTitle) != 0:
             count += 1
             if postID in postIDToWikiEntryIDDict:
                 postIDToWikiEntryIDDict[postID].append(wikiEntryID)
             else:
                 postIDToWikiEntryIDDict[postID] = [wikiEntryID]
             if wikiEntryID in wikiEntryIDToPostIDDict:
                 wikiEntryIDToPostIDDict[wikiEntryID].append(postID)
             else:
                 wikiEntryIDToPostIDDict[wikiEntryID] = [postID]
 if postID in postIDToPostContentDict:
     if postID in postIDToWikiEntryIDDict and len(
             postIDToWikiEntryIDDict[postID]) > maxEntry:
         maxEntry = len(postIDToWikiEntryIDDict[postID])
     seg_list_generator = jieba.cut(postContent, cut_all=False)
     seg_list = list(seg_list_generator)