/
service.py
240 lines (209 loc) · 8.76 KB
/
service.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
# -*- coding: UTF-8 -*-
import datetime, os, time, json, numpy
import utils
import urllib2
from optparse import OptionParser
import extract_tags
import draw_graphic
KEYWORD_STUCK = '卡'
KEYWORD_CRASH = '闪退'
KEYWORD_DANMU = '弹幕'
NEW_VERSION = 3.630
keyword_label_array = [draw_graphic.STUCK_NAME,
draw_graphic.DANMU_NAME,
draw_graphic.CRASH_NAME,
draw_graphic.REST_NAME,
draw_graphic.TOTAL_NAME,
draw_graphic.TIME_NAME]
# 获取所有评论信息
def get_info():
page = 1
max_page = 100
f = open(utils.get_file_path(date=utils.current_date()), 'w')
dic = utils.get_output_total_info_dic()
while page <= max_page:
print str(page)
url = "http://yuqing.dz11.com/Home/Nav/getUserFeedbackList?channel=ios&startTime=" + start_time + "%2000%3A00%3A00&endTime=" + end_time + "%2023%3A59%3A59&pageNum=" \
+ str(page) + "&pageSize=20"
print "当前请求URL: " + url
try:
request = urllib2.Request(url)
response = urllib2.urlopen(request)
result = json.loads(response.read().decode('utf-8'))
max_page = int(result['data']['total']) / 20 + 1
page += 1
for record in result['data']['records']:
utils.handle_total_info_data(dic, record)
f.write(record['title'].encode('utf-8') + ' ')
f.write(record['content'].encode('utf-8') + '\n')
except urllib2.URLError, e:
if hasattr(e, "code"):
print e.code
if hasattr(e, "reason"):
print e.reason
utils.output_total_info(dic)
f.close()
# 获取关键词条目数
def get_key_words_count(key_words):
url = "http://yuqing.dz11.com/Home/Nav/getUserFeedbackList?channel=ios&keywords=" + key_words + "&startTime=" + start_time + "%2000%3A00%3A00&endTime=" + end_time + "%2023%3A59%3A59&pageNum=1&pageSize=20"
print "当前请求URL: " + url
try:
request = urllib2.Request(url)
response = urllib2.urlopen(request)
result = json.loads(response.read().decode('utf-8'))
max_page = result['data']['total']
print key_words + ': ' + utils.convert_to_utf8(max_page)
return utils.convert_to_utf8(max_page)
except urllib2.URLError, e:
if hasattr(e, "code"):
print e.code
if hasattr(e, "reason"):
print e.reason
# 获取关键词对应的内容
def get_key_words_content(key_words):
page = 1
max_page = 100
max_length_content = ''
title = ''
device = ''
version = ''
max_length_id = ''
while page <= max_page:
url = "http://yuqing.dz11.com/Home/Nav/getUserFeedbackList?channel=ios&keywords=" + key_words + "&startTime=" + start_time + "%2000%3A00%3A00&endTime=" + end_time + "%2023%3A59%3A59&pageNum=1&pageSize=20"
result = send_request(url)
max_page = int(result['data']['total']) / 20 + 1
page += 1
for record in result['data']['records']:
content = utils.convert_to_utf8(record['content'])
id = int(utils.convert_to_utf8(record['id']))
if len(content) > len(max_length_content) and id not in problemIDs:
max_length_content = content
title = utils.convert_to_utf8(record['title'])
device = utils.convert_to_utf8(record['ua'])
version = utils.convert_to_utf8(record['version'])
max_length_id = id
problemIDs.append(max_length_id)
print title, max_length_content, device, version
return title, max_length_content, device, version, max_length_id
def send_request(url):
print "当前请求URL: " + url
try:
request = urllib2.Request(url)
response = urllib2.urlopen(request)
result = json.loads(response.read().decode('utf-8'))
return result
except urllib2.URLError, e:
if hasattr(e, "code"):
print e.code
if hasattr(e, "reason"):
print e.reason
# 批量获取所有关键词的内容
def get_all_key_word_content(keyword_count, additon_key_word=''):
# 使用结巴分词得到关键词
tags = extract_tags.get_topK_words(utils.get_file_path(date=utils.current_date()), keyword_count)
titles = []
contents = []
device = []
version = []
ids = []
# 循环获取关键词内容
if len(additon_key_word) > 0:
tags.insert(0, additon_key_word)
keywords = ",".join(tags).encode('utf-8')
for tag in tags:
title, content, dev, ver, index_id = get_key_words_content(utils.convert_to_utf8(tag))
if content is None or len(content) == 0:
continue
titles.append(title)
contents.append(content)
device.append(dev)
version.append(ver)
ids.append(index_id)
label_array = [utils.TITLE_NAME, utils.CONTENT_NAME, utils.DEVICE_NAME, utils.VERSION_NAME]
key_word_table = utils.output_keyword_table(
{label_array[0]: titles, label_array[1]: contents, label_array[2]: device, label_array[3]: version},
label_array, ids)
return key_word_table, keywords
# 获取查询时的开始时间和结束时间
def get_start_time(date=None):
if date is None:
date = datetime.date.today().strftime("%Y-%m-%d")
else:
date = date.strftime("%Y-%m-%d")
global start_time
global end_time
# 计算开始时间和结束时间
start_time = date
end_time = date
if start_time > end_time:
print "开始时间不能大于结束时间!"
return
# 所有问题的excel表格文件名
def totoal_info_excel_file_name():
tail = '.xlsx'
date = utils.current_date()
return utils.get_file_path(tail, date), utils.get_file_prefix_name(tail, date)
# 输出指定日期内的舆情关键词统计数据
def out_put_all_statistic(begin, end):
array = [[], [], [], [], [], []]
for i in range((end - begin).days + 1):
day = begin + datetime.timedelta(days=i)
get_start_time(day)
stuck = int(get_key_words_count(KEYWORD_STUCK))
danmu = int(get_key_words_count(KEYWORD_DANMU))
crash = int(get_key_words_count(KEYWORD_CRASH))
total = int(get_key_words_count(''))
rest = total - stuck - danmu - crash
array[0].append(stuck)
array[1].append(danmu)
array[2].append(crash)
array[3].append(rest)
array[4].append(total)
array[5].append(day.strftime('%m-%d'))
draw_graphic.out_put_graphic(dict(zip(keyword_label_array, array)), keyword_label_array)
# 输出指定日期的舆情关键词统计数据
def out_put_today_statistic(today):
stuck = int(get_key_words_count(KEYWORD_STUCK))
danmu = int(get_key_words_count(KEYWORD_DANMU))
crash = int(get_key_words_count(KEYWORD_CRASH))
total = int(get_key_words_count(''))
time = today.strftime('%m-%d')
rest = total - stuck - danmu - crash
draw_graphic.out_put_graphic(dict(zip(keyword_label_array, [[stuck], [danmu], [crash], [rest], [total], [time]])),
keyword_label_array)
return stuck, danmu, crash, total
def add_new_version_problems_into_problems_list(new_version_problems):
global problemIDs
problemIDs = []
problemIDs.extend(new_version_problems)
# 开始统计工作
def make_statistic(topK, is_test=False):
# 获得开始时间参数
get_start_time(utils.current_date())
# 获取全部信息,保存为xlsx表格用于附件发送,保存标题和内容信息用于提取关键词
get_info()
# 输出卡顿等特征值表格和图表,返回卡顿等数目
stuck, danmu, crash, total = out_put_today_statistic(utils.current_date())
# 获取最新版本的问题反馈
new_version_html,new_version_problems = utils.output_new_version_table(NEW_VERSION)
# 将新版本的问题id添加到问题id list中
add_new_version_problems_into_problems_list(new_version_problems)
# 添加额外关键词
additon_keyword = ''
# 如果有闪退的反馈,增加闪退关键词
if crash > 0:
additon_keyword = u'闪退'
# 获取所有关键词的推荐内容
key_word_table, key_words = get_all_key_word_content(topK, additon_keyword)
# 邮件主题
sub = 'iOS组-舆情平台日报'
# 邮件的内容html文件
html = utils.make_email_html(sub + utils.current_date().strftime(" %Y.%m.%d"), key_words, key_word_table,
str(total),
str(crash), str(stuck), str(danmu), new_version_table=new_version_html)
# 生成明日的额外概要内容文件
utils.create_next_day_additions_file()
return sub, html
if __name__ == '__main__':
get_start_time(utils.current_date())
get_all_key_word_content(8)