async def getCrawledNews(self): # 抓取新闻数 # 含相关新闻的抓取新闻数 date = [] crawledNews = [] crawledNewsWithRelative = [] end_time = arrow.get( str(self.date) + ' 17:00:00+00:00', 'YYYYMMDD HH:mm:ss') start_time = end_time.replace(days=-1) sql = ''' SELECT count(*) AS crawledNews FROM News WHERE [CreatedTime]>='{start}' AND [CreatedTime]<'{end}'; '''.format( start=start_time, end=end_time ) res = mssql.query(sql, LOCAL) for item in res: crawledNews.append(int(item['crawledNews'])) sql = ''' SELECT count(distinct(RelativeNews.LeftNewsId)) AS crawledNewsWithRelative FROM RelativeNews JOIN News ON RelativeNews.LeftNewsId=News.NewsId WHERE News.CreatedTime>='{start}' AND News.CreatedTime<'{end}' '''.format( start=start_time, end=end_time) res = mssql.query(sql, LOCAL) for item in res: date.append(self.date) crawledNewsWithRelative.append( int(item['crawledNewsWithRelative'])) await self.insert( date=date, table='crawledNews', crawledNews=crawledNews, crawledNewsWithRelative=crawledNewsWithRelative )
async def getNewsByTitle(self, title): """ 搜索新闻,使用sql like 匹配标题 :param title: 搜索新闻标题关键字 :return: 包含匹配结果的数组 """ ret = [] sql = ''' SELECT TOP 100 News.*, Tags.Name FROM {table} LEFT JOIN NewsTags ON News.NewsId = NewsTags.NewsId AND NewsTags.Score = 1 LEFT JOIN Tags ON NewsTags.TagId = Tags.Id WHERE UPPER(title) LIKE '%{title}%' ORDER BY CreatedTime DESC '''.format(table='News', title=title.upper()) res = query(sql, self.LOCAL) for item in res: ret.append(item) return ret
async def getCountNewsByType(self): # 抓取新闻类型 # 抓取新闻数量 date = [] newsType = [] newsCount = [] end_time = arrow.get( str(self.date) + ' 17:00:00+00:00', 'YYYYMMDD HH:mm:ss') start_time = end_time.replace(days=-1) sql = ''' SELECT count(*) AS Count, [Type] FROM [News] WHERE [CreatedTime]>='{start}' AND [CreatedTime]<'{end}' GROUP BY [Type] '''.format( start=start_time, end=end_time) res = mssql.query(sql, LOCAL) for item in res: date.append(self.date) newsType.append(int(item['Type'])) newsCount.append(int(item['Count'])) if len(date) == 0: date.append(self.date) newsType.append(0) newsCount.append(0) await self.insert( date=date, table='countNewsByType', newsType=newsType, newsCount=newsCount )
async def getCommentUser(self): # 评论用户 # 评论用户比例 date = [] commentUser = [] totalComments = [] commentsDict = dict() end_time = arrow.get( str(self.date) + ' 17:00:00+00:00', 'YYYYMMDD HH:mm:ss') start_time = end_time.replace(days=-1) sql = ''' SELECT [UserId], count(*) AS Count FROM Comments WHERE [Timestamp]>='{start}' AND [Timestamp]<'{end}' GROUP BY [UserId] '''.format( start=start_time, end=end_time ) res = mssql.query(sql, LOCAL) for item in res: commentsDict.update({item['UserId']: item['Count']}) date.append(self.date) commentUser.append(len(commentsDict)) totalComments.append(sum(commentsDict.values())) await self.insert( date=date, table='commentUser', commentUser=commentUser, totalComments=totalComments )
async def getFetchReadByCategory(self): # 新闻阅读数 # 新闻展示数 # 新闻类型 date = [] readCount = [] fetchCount = [] categoryName = [] categoryIdNameDict = {-1: 'Recommand'} sql = 'SELECT [CategoryId], [Name] FROM Categories' res = mssql.query(sql, LOCAL) for item in res: categoryIdNameDict.update({item['CategoryId']: item['Name']}) content = self.readFile('fetch_read_by_category') for line in content: try: categoryId = int(line[2]) if categoryId not in categoryIdNameDict.keys(): continue date.append(self.date) readCount.append(int(line[0])) fetchCount.append(int(line[1])) categoryName.append(categoryIdNameDict[categoryId]) except ValueError: pass if len(date) == 0: date.append(self.date) readCount.append(0) fetchCount.append(0) categoryName.append('') await self.insert( date=date, table='fetchReadByCategory', readCount=readCount, fetchCount=fetchCount, categoryName=categoryName )
async def getQueryData(self, parameters): start_date = parameters.get('start_date') end_date = parameters.get('end_date') click_index = parameters.get('click_index') tag = parameters.get('tag') requestcategoryid = parameters.get('requestcategoryid') newstype = parameters.get('newstype') mediaid = parameters.get('mediaid') print(click_index, tag, requestcategoryid, newstype, mediaid) categoryid = parameters.get('categoryid') gb = parameters.get('gb') with_relative = parameters.get('with_relative') medias = dict() sql = 'SELECT * FROM Media' res = mssql.query(sql, MELOCAL) for dt in res: medias[str(dt['Id'])] = dt['Name'] for i, column in enumerate(gb): if column == 'date': gb[i] = 'left(date, 8)' if start_date == '': self.finish(json.dumps([])) else: sfrom = 'FROM {}.{}_dashboard_data'.format(DATABASE, self.LOCAL) select = ('CAST(SUM(presents) AS UNSIGNED) AS present,' 'CAST(SUM(clicked) AS UNSIGNED) AS click ') where = 'WHERE date<=%s and date>=%s' % (end_date, start_date) for index, item in enumerate(gb): if item == 'clickindex': gb[index] = 'click_index' gb = ','.join(gb) groupby = '%s ' % gb if len(with_relative) > 0 and with_relative[0] == '1': where += ' and tag like "RelativeNews:%"' elif len(with_relative) > 0 and with_relative[0] == '-1': where += ' and tag not like "RelativeNews:%"' for vari in [ 'click_index', 'tag', 'requestcategoryid', 'newstype', 'mediaid', 'categoryid' ]: if (eval(vari) is not None and len(eval(vari)) != 0 and eval(vari)[0] != ''): where += ' and %s in (%s)' % (vari, ','.join( ['"%s"' % i for i in eval(vari)[0].split(',')])) if gb != '': select += ', %s' % gb if groupby.strip() != '': groupby = 'GROUP BY ' + groupby sql = 'SELECT %s %s %s %s' % (select, sfrom, where, groupby) data = DB(**DBCONFIG).query(sql) res = [] if len(data) > 0 and data[0].get('categoryid') is not None: sql = ''' SELECT CategoryId, Name FROM Categories '''.format(categoryid) category_id_name = mssql.query(sql, self.LOCAL) category_id_name_dict = ({ item['CategoryId']: item['Name'] for item in category_id_name }) for item in data: item.update({ 'CategoryName': category_id_name_dict.get(int(item.get('categoryid'))) }) for dts in data: if "mediaid" in gb: mid = dts['mediaid'] dts['mediaid'] = '%s-%s' % ( mid, medias.get(str(mid), 'not know')) if dts['present'] == 0: dts['ctr'] = 0 else: dts['ctr'] = '{0:%}'.format(dts['click'] / dts['present']) for key in [ 'date', 'left(date, 8)', 'right(date, 2)', 'click_index', 'tag', 'requestcategoryid', 'newstype', 'mediaid', 'categoryid' ]: dts.setdefault(key, '0') res.append(dts) return res