def get_snlp_index(): from get_sql_name import get_sql_name_key sqlname = get_sql_name_key() conn = sqlite3.connect('weibo-spider-scrapy/weibo.sqlite3') curs = conn.cursor() curs.execute("select blog_id,usr_id,user_name,content,reposts_count,comments_count,attitudes_count from %s" % (sqlname)) list = curs.fetchall() array = [] for item in list: s = SnowNLP(item[2]) i = 0 sum = 0 for sentence in s.sentences: # print(sentence) s = SnowNLP(sentence) # print(sentence,s.sentiments) i = i + 1 sum = sum + s.sentiments avg = sum / i dir = {} dir["blog_id"] = str(item[0]) dir["user_id"] = str(item[1]) dir["user_name"] = item[2] dir["content"] = item[3] dir["reposts_count"] = item[4] dir["comments_count"] = item[5] dir["attitudes_count"] = item[6] dir["value"] = avg array.append(dir) return array
def snlp_json(): from get_sql_name import get_sql_name_key sqlname = get_sql_name_key() conn = sqlite3.connect('weibo-snlp.sqlite3') cursor = conn.cursor() cursor.execute('select blog_id,user_id,user_name,content,reposts_count,comments_count,attitudes_count,value from %s' % (sqlname)) list = cursor.fetchall() myarray = [] for item in list: mydir = {} mydir["content"] = item[3] mydir["user_id"] = item[1] mydir["user_name"] = item[2] mydir["count"] = int(item[4])+int(item[5])+int(item[6]) mydir["value"] = item[7] myarray.append(mydir) # x = [{'name': 'Homer', 'age': 39}, {'name': 'Bart', 'age': 10}] sorted_myarray = sorted(myarray, key=operator.itemgetter('value')) print_array = [] i = 0 for item in sorted_myarray: if i>=10: break flag = 1 for print_item in print_array: if item["content"] == print_item["content"]: flag= 0 break if flag==0: continue print_dir = {} print_dir["content"] = item["content"] print_dir["user_id"] = item["user_id"] print_dir["user_name"] = item["user_name"] print_array.append(print_dir) i = i + 1 # for item in print_array: # print(item) total = len(list) array = [] dir = {} dir["极好"] = 0 dir["好"] = 0 dir["一般"] = 0 dir["差"] = 0 dir["极差"] = 0 for item in list: value = float(item[7]) if value>=0 and value<0.2: dir["极差"] = dir["极差"] + 1 if value>=0.2 and value<0.4: dir["差"] = dir["差"] + 1 if value>=0.4 and value<0.6: dir["一般"] = dir["一般"] + 1 if value>=0.6 and value<0.8: dir["好"] = dir["好"] + 1 if value>=0.8 and value<=1: dir["极好"] = dir["极好"] + 1 return dir["极好"],dir["好"],dir["一般"],dir["差"],dir["极差"],total
def snlp_main(): while True: now_time = time.strftime('%M', time.localtime(time.time())) if int(now_time) == "0" : from get_sql_name import get_sql_name_key array = get_snlp_index() sqlname = get_sql_name_key() insert_item(array, sqlname) time.sleep(60)
def key_word_main(): while True: now_time = time.strftime('%M', time.localtime(time.time())) if int(now_time) == "0": from get_sql_name import get_sql_name_key sql_name = get_sql_name_key() array = keyword(sql_name) insert_new_sql(sql_name, array) time.sleep(60)
def get_keyword_json(): from get_sql_name import get_sql_name_key SQLname = get_sql_name_key() conn = sqlite3.connect('weibo-keywords.sqlite3') cursor = conn.cursor() cursor.execute('select word,value from %s' % (SQLname)) list = cursor.fetchall() array = [] for word, weight in list: dir = {} dir["word"] = word dir["weight"] = weight array.append(dir) s = json.dumps(array) return s
def get_focus_things(): array = [] conn = sqlite3.connect('weibo-spider-scrapy/weibo.sqlite3') tablename = get_sql_name_key() curs = conn.cursor() curs.execute( "select blog_id,content,reposts_count,comments_count,attitudes_count from %s" % (tablename)) list = curs.fetchall() for info in list: total = int(info[2]) + int(info[3]) + int(info[4]) if total >= 100: content = info[1] tr4w = TextRank4Keyword() tr4w.analyze(text=content, lower=True, window=2) phrase = tr4w.get_keyphrases(keywords_num=20, min_occur_num=3) for p in phrase: if str(p) == "生殖器挂": continue if p not in array: array.append(p) curs.close() return array