示例#1
0
def getDowloadCapacity(appname, cataname=""):
    app_capacity = {}

    capacity_table = "capacity_table"
    app_table = "app_table"
    if cataname == "":
        app = MongoUtil.find_one(app_table, {"appname": appname})
    else:
        app = MongoUtil.find_one(app_table, {
            "catagory": cataname,
            "appname": appname
        })
    if app == None:
        print(cataname + appname + "不存在")
        return
    else:
        app_id = app["_id"]
        cur = MongoUtil.find(capacity_table, {"appid": app_id})
        for item in cur:
            date = item["date"]
            capacity = item["capacity"]
            if not capacity.isdigit():
                capacity = install2num(capacity)
            app_capacity[date] = capacity

    return app_capacity
示例#2
0
def saveCommentEmotionData(model,best_words,app):

    time.sleep(1)
    appid = app["_id"]
    appname = app["appname"]
    cataname = app["catagory"]

    if MongoUtil.isExist("emotion_comment",{"appid":appid}):
        print(appname+"已经存在了")
        print()
        return

    results = MongoUtil.find(cataname,{"appid":appid})
    print(cataname,appname)
    comments = {}
    pos_count = 0
    neg_count = 0

    for item in results:
        word_id = item["wordid"]
        location = item["location"]
        word = MongoUtil.find_one("word_table",{"_id":word_id})["word"]
        comments.setdefault(location,[])
        comments[location].append(word)

    for key in comments.keys():
        comment_words = comments[key]
        pred = predict(model,comment_words,best_words)
        emotion = judgeCommentEmotion(pred.prob('pos'),pred.prob('neg'))
        if emotion == 1 : pos_count += 1
        if emotion == 2 : neg_count += 1

    savetoDB(appid,len(comments),pos_count,neg_count)
示例#3
0
def scanMostFastGrownApps(order=-1,limit=50,capacity_limit = 10000,date = "2017-01-23"):
    capacity_low_limit = 10000
    results = MongoUtil.sort_with_values("capacity_rate_table",{"date":date},"incre_rate",order = order)
    for result in results:
        limit -=1
        appid = result["appid"]
        appinfo = MongoUtil.find_one("app_table",{"_id":appid})
        capacityinfo = MongoUtil.find_one("capacity_table",{"appid":appid,"date":date})
        if capacityinfo is None or capacityinfo["capacity_num"] < capacity_low_limit:
            continue
        appinfo["incre_rate"] = result["incre_rate"]
        appinfo["wilson_lower_rate"] = result["wilson_lower_rate"]
        print(appinfo)
        print()
        if limit <=0 :
            break
示例#4
0
def getRecommendInfo(appinfo, date):
    recommend_info = {}

    capacity_info = MongoUtil.find_one("capacity_table", {
        "appid": appinfo["_id"],
        "date": date
    })
    if capacity_info is None:
        # print(appinfo["appname"],end=" 1\n")
        return None
    capacity_rate_info = MongoUtil.find_one("capacity_rate_table", {
        "appid": appinfo["_id"],
        "date": date
    })
    if capacity_rate_info is None:
        # print(appinfo["appname"],end=" 2\n")
        return None
    comment_info = MongoUtil.find_one("emotion_comment",
                                      {"appid": appinfo["_id"]})
    if comment_info is None:
        # print(appinfo["appname"],end=" 3\n")
        return None

    try:
        recommend_info["appname"] = appinfo["appname"]
        recommend_info["catagory"] = appinfo["catagory"]
        recommend_info["appid"] = appinfo["_id"]
        recommend_info["capacity"] = capacity_info["capacity_num"]
        recommend_info["date"] = date
        recommend_info["capacity_rate"] = capacity_rate_info["incre_rate"]
        recommend_info["comment_wilson_lower_score"] = comment_info[
            "wilson_lower_score"]
        recommend_info["comment_count"] = comment_info["comment_count"]
        recommend_info["recommend_score"] = (
            getLastCapacityNormalization(recommend_info["capacity_rate"]) *
            last_capacity_rate_param +
            getCapacityNormalization(recommend_info["capacity"]) *
            capacity_param + getApplauseNormalization(
                recommend_info["comment_wilson_lower_score"]) * applause_param
            + getCommentCountNormalization(recommend_info["comment_count"]) *
            comment_count_param + correct(recommend_info))
    except:
        print("-->" + recommend_info["appname"])

    return recommend_info
示例#5
0
def scanMostCapacityApps(limit=50):
    results = MongoUtil.capacity_find_most(limit)
    for result in results:
        result = result["value"]
        appid = result["appid"]
        appinfo = MongoUtil.find_one("app_table",{"_id":appid})
        appinfo["capacity_num"] = result["capacity_num"]
        appinfo["date"] = result["date"]
        print(appinfo)
        print()
示例#6
0
def deliveryWords(appinfo,filename):
    print(appinfo.name)
    contents = [line.strip() for line in open(filename)]
    wordlist = []
    line_num = 0
    result = MongoUtil.find_one("app_table", {"catagory":appinfo.cata, "appname":appinfo.name})
    if result==None:
        print("\""+appinfo.cata+" "+appinfo.name+"\" 未存入数据库中,请先存储")
        return
    appid = result['_id']

    result = MongoUtil.find_one(appinfo.cata, {"appid":appid})
    # result = MongoUtil.find_one("wordlocation_table",{"appid":appid})
    if result!=None:
        print("\""+appinfo.cata+" "+appinfo.name+"\" 已经分词存入数据库,不必重复")
        return

    for line in contents:
        time.sleep(0.1)
        line_num+=1
        # 去除乱码
        line = re.compile('[\\x00-\\x08\\x0b-\\x0c\\x0e-\\x1f]').sub(' ', line)
        # 使用全模式
        seglist = jieba.cut(line,cut_all=False)
        wordlist.append(seglist)
        for word in seglist:
            if word not in stopWords and word not in punctuations and word != '\n' and word!=' ' and not word.isdigit():
                # print(word,end=",")
                post_word = {}
                post_word["word"]=word
                if not MongoUtil.isExist("word_table", post_word):
                    MongoUtil.insert("word_table", post_word)

                result = MongoUtil.find_one("word_table", post_word)

                wordid = result['_id']
                if wordid==None:
                    print(post_word)
                post_location ={}
                post_location["appid"]=appid
                post_location["wordid"]=wordid
                post_location["location"]=line_num
                MongoUtil.insert(appinfo.cata, post_location)
示例#7
0
    def __init__(self, appname, cataname=""):
        self.tf_idfdict = None
        if cataname == "":
            self.app = MongoUtil.find_one("app_table", {"appname": appname})
        else:
            self.app = MongoUtil.find_one("app_table", {
                "catagory": cataname,
                "appname": appname
            })

        if self.app is None:
            print("该app未存储在数据库,可能原因:查询不准确,未存储入数据库,数据未更新")
        print(self.app)
        self.worddict, self.wordcount = self.frequencyscore()
        if self.wordcount < 100:
            print("该app的评论数量过少,获取关键词将会不准确")
            return
        print("评论总数是:" + str(self.wordcount))
        self.tf_idfdict = self.tf_idf()
示例#8
0
def get_app_each_comment(appname,cataname =""):
    if cataname == "":
        app = MongoUtil.find_one("app_table", {"appname":appname})
    else:
        app = MongoUtil.find_one("app_table", {"catagory":cataname, "appname":appname})
    print(app)
    if app is None:
        return
    app_id = app["_id"]
    app_cata = app["catagory"]
    results = MongoUtil.find(app_cata,{"appid":app_id})
    comments = {}

    for item in results:
        word_id = item["wordid"]
        location = item["location"]
        word = MongoUtil.find_one("word_table",{"_id":word_id})["word"]
        comments.setdefault(location,[])
        comments[location].append(word)
    return comments
示例#9
0
 def frequencyscore(self):
     worddict = {}
     wordcount = 0
     cur = MongoUtil.find(self.app["catagory"], {"appid": self.app["_id"]})
     for locationinfo in cur:
         wordinfo = MongoUtil.find_one("word_table",
                                       {"_id": locationinfo["wordid"]})
         word = wordinfo["word"]
         worddict.setdefault(word, 0)
         worddict[word] += 1
         wordcount += 1
     return worddict, wordcount
示例#10
0
def scanMostPositiveApps(order=-1,limit=50):
    results = MongoUtil.sort("emotion_comment","wilson_lower_score",order = order,limit = limit)
    for result in results:
        appid = result["appid"]
        appinfo = MongoUtil.find_one("app_table",{"_id":appid})
        appinfo["comment_count"] = result["comment_count"]
        appinfo["pos_count"] = result["pos_count"]
        appinfo["neg_count"] = result["neg_count"]
        appinfo["applause_rate"] = result["applause_rate"]
        # appinfo["wilson_top_score"] = result["neg_count"]
        appinfo["wilson_lower_score"] = result["wilson_lower_score"]
        print(appinfo)
        print()
示例#11
0
    def tf_idf(self):

        if self.worddict == None or len(self.worddict) == 0:
            print("请初始化词频统计")
            return
        if self.wordcount < 100:
            print("该app的评论数量过少,获取关键词将会不准确")
            return

        #文档总数
        docu_count = len(
            MongoUtil.distinct_count(self.app["catagory"], "appid",
                                     value=None))
        #减去它本身
        docu_count -= 1

        tf_idfdict = {}
        for item in self.worddict.items():
            result = MongoUtil.find_one("word_table", {"word": item[0]})
            wordid = result["_id"]
            include_count = len(
                MongoUtil.distinct_count(self.app["catagory"],
                                         "appid",
                                         value={"wordid": wordid}))
            #减去它本身
            include_count -= 1

            # print(item[0]+"->"+str(item[1])+"  包含的总文档数"+str(include_count))
            # print(str(docu_count) + " "+str(include_count))
            if docu_count <= 0:
                docu_count = 0

            wordidf = float(math.log(docu_count / (include_count + 1)))
            wordtf = float(item[1] / self.wordcount)
            tf_idfdict[item[0]] = wordtf * wordidf

        for item in tf_idfdict.items():
            print(item[0] + "    出现的次数:" + str(self.worddict[item[0]]) +
                  "     tf-idf计算值:" + str(item[1]))

        return tf_idfdict
示例#12
0
def delivery_words(appid,content):
    # 去除乱码
    content = re.compile('[\\x00-\\x08\\x0b-\\x0c\\x0e-\\x1f]').sub(' ', content)
    # 使用全模式
    seglist = jieba.cut(content,cut_all=False)
    for word in seglist:
        if word not in stopWords and word not in punctuations and word != '\n' and word!=' ' and not word.isdigit():
            post_word = {}
            post_word["word"]=word
            if not MongoUtil.isExist("word_table", post_word):
                MongoUtil.insert("word_table", post_word)

            result = MongoUtil.find_one("word_table", post_word)

            wordid = result['_id']
            if wordid==None:
                print(post_word)

            post_location ={}
            post_location["appid"]=appid
            post_location["wordid"]=wordid
            posts.append(post_location)
示例#13
0
def getCataAppsInfo(filename):
    print(filename)
    date, count = readInfoFile(filename)
    catas = json.load(open(const.WANDOUJIA_CATA_JSON_FILE))
    for cataname in catas:
        cataname = cataname.strip()
        catafilename = filename + "/" + cataname + ".json"
        apps = json.load(open(catafilename))
        for app in apps.items():
            name = app[0].strip().replace("/", " ")
            # capacity = app
            time.sleep(0.2)
            post = {"catagory": cataname, "appname": name}
            result = MongoUtil.find_one("app_table", post)
            print("存入的app" + name)
            if result == None:
                print(cataname + "->" + name + "   ->该app未存入数据库")
                app_not_exist.append(app)
            else:
                appid = result["_id"]
                capacity = app[1]["install"]
                # capacity = install2num(capacity)
                saveAppCapacityToDB(appid, date, capacity)
示例#14
0
def deleteAppDieveryWord(cataname,appname):
    id = MongoUtil.find_one("app_table", {"appname":appname})["_id"]
    result = MongoUtil.remove(cataname, {"appid":id})
    print("已从“"+cataname+"”数据库中删除“"+appname+"”应用的分词信息")