def saveCommentEmotionData(model,best_words,app): time.sleep(1) appid = app["_id"] appname = app["appname"] cataname = app["catagory"] if MongoUtil.isExist("emotion_comment",{"appid":appid}): print(appname+"已经存在了") print() return results = MongoUtil.find(cataname,{"appid":appid}) print(cataname,appname) comments = {} pos_count = 0 neg_count = 0 for item in results: word_id = item["wordid"] location = item["location"] word = MongoUtil.find_one("word_table",{"_id":word_id})["word"] comments.setdefault(location,[]) comments[location].append(word) for key in comments.keys(): comment_words = comments[key] pred = predict(model,comment_words,best_words) emotion = judgeCommentEmotion(pred.prob('pos'),pred.prob('neg')) if emotion == 1 : pos_count += 1 if emotion == 2 : neg_count += 1 savetoDB(appid,len(comments),pos_count,neg_count)
def saveAppToDB(appinfo): post = {} post["catagory"]=appinfo.cata post["appname"]=appinfo.name # post["installnum"]=appinfo.installnum post["url"]=appinfo.url post["descripe"]=appinfo.descripe post["apk"]=appinfo.apk post["date"]=time.strftime('%Y-%m-%d',time.localtime(time.time())) # print(post) if not MongoUtil.isExist("app_table", {"catagory":appinfo.cata, "appname":appinfo.name}): MongoUtil.insert("app_table", post) print(appinfo.cata + appinfo.name)
def saveAllComentEmotionData(): begin = False model,best_words = load_model() catas = json.load(open(const.WANDOUJIA_CATA_JSON_FILE)) for cataname in catas: apps = MongoUtil.find("app_table",{"catagory":cataname}) code = 0 for app in apps: code+=1 print(code,end=" ") if MongoUtil.isExist("emotion_comment",{"appid":appid}): print(appname+"已经存在了") break saveCommentEmotionData(model,best_words,app)
def deliveryWords(appinfo,filename): print(appinfo.name) contents = [line.strip() for line in open(filename)] wordlist = [] line_num = 0 result = MongoUtil.find_one("app_table", {"catagory":appinfo.cata, "appname":appinfo.name}) if result==None: print("\""+appinfo.cata+" "+appinfo.name+"\" 未存入数据库中,请先存储") return appid = result['_id'] result = MongoUtil.find_one(appinfo.cata, {"appid":appid}) # result = MongoUtil.find_one("wordlocation_table",{"appid":appid}) if result!=None: print("\""+appinfo.cata+" "+appinfo.name+"\" 已经分词存入数据库,不必重复") return for line in contents: time.sleep(0.1) line_num+=1 # 去除乱码 line = re.compile('[\\x00-\\x08\\x0b-\\x0c\\x0e-\\x1f]').sub(' ', line) # 使用全模式 seglist = jieba.cut(line,cut_all=False) wordlist.append(seglist) for word in seglist: if word not in stopWords and word not in punctuations and word != '\n' and word!=' ' and not word.isdigit(): # print(word,end=",") post_word = {} post_word["word"]=word if not MongoUtil.isExist("word_table", post_word): MongoUtil.insert("word_table", post_word) result = MongoUtil.find_one("word_table", post_word) wordid = result['_id'] if wordid==None: print(post_word) post_location ={} post_location["appid"]=appid post_location["wordid"]=wordid post_location["location"]=line_num MongoUtil.insert(appinfo.cata, post_location)
def saveRecommendApps(date): apps = MongoUtil.find("app_table", {}) recommendApps = [] tem = [] for app in apps: tem.append(app) for app in tem: recommend_info = getRecommendInfo(app, date) if recommend_info is None: continue if MongoUtil.isExist("recommend_table", { "appid": app["_id"], "date": date }): print(date + " " + app["appname"] + " 已经存在") continue print(app["appname"]) recommendApps.append(recommend_info) MongoUtil.upsert_mary("recommend_table", recommendApps)
def delivery_words(appid,content): # 去除乱码 content = re.compile('[\\x00-\\x08\\x0b-\\x0c\\x0e-\\x1f]').sub(' ', content) # 使用全模式 seglist = jieba.cut(content,cut_all=False) for word in seglist: if word not in stopWords and word not in punctuations and word != '\n' and word!=' ' and not word.isdigit(): post_word = {} post_word["word"]=word if not MongoUtil.isExist("word_table", post_word): MongoUtil.insert("word_table", post_word) result = MongoUtil.find_one("word_table", post_word) wordid = result['_id'] if wordid==None: print(post_word) post_location ={} post_location["appid"]=appid post_location["wordid"]=wordid posts.append(post_location)
def scan_cata_app(cata): posts.clear() results = MongoUtil.find("app_table",{"catagory":cata}) code = 0 apps = [] for item in results: apps.append(item) for app in apps: code+=1 posts.clear() print(code,end=" ") print(app["appname"]) if MongoUtil.isExist("app_detail_descripe",{"appid":app["_id"]}): continue content = read_descripe(cata,app["appname"]) if content is not None: delivery_words(app["_id"],content) print(len(posts)) # print(posts) print() if(len(posts) > 0): MongoUtil.upsert_mary("app_detail_descripe",posts)
def saveAppCapacityToDB(appid, date, capacity): post = {"appid": appid, "date": date} if not MongoUtil.isExist("capacity_table", post): post["capacity"] = capacity MongoUtil.save("capacity_table", post)