def corpus(): sql = "select DISTINCT(user_id) as user_id from online_bussiness_corpusbatch" cursor.execute(sql) users = cursor.fetchall() for user in users: user_id = user["user_id"] cursor.execute("select mall_id from users_userprofile where id = %s", [user_id]) mallinfo = cursor.fetchone() or {} mall_id = mallinfo.get("mall_id") sql = "SELECT * FROM v_corpus where user_id =%s " cursor.execute(sql, [user_id]) data = cursor.fetchall() if data: count = 20 # everytime insert es data`s count insert_count = int(random._ceil(len(data) / float(count))) for cut in xrange(1, insert_count + 1): action_lis = data[(cut - 1) * count:cut * count] for i in action_lis: if mall_id: i["mall_id"] = mall_id i["_id"] = i["corpusobject_id"] i["question_list"] = json.loads(i["question_list"]) i["text_anwser"] = json.loads( i["answer_list"])['text_anwser'] i["praisegoods_answer"] = json.loads( i["answer_list"])['praisegoods_answer'] del i["answer_list"] ok, err = helpers.bulk(client, actions=action_lis, index=corpus_es, doc_type="corpus") print(ok, err)
def mall_analysis(cycle='天', at_id=None, mall_id=None, start_date=None, end_date=None): # cycle:统计周期 # mall_id:商城id # at_id:素材类型 # 默认去掉公众号文章素材(at_id=1) statistics_dic = dict(( (1, '总计'), (2, '年'), (3, '月'), (4, '周'), (5, '天'), )) date_format_dic = {'年': '%Y', '月': '%Y-%m', '周': '%x-%v', '天': '%Y-%m-%d'} statistics_dic = {value: key for key, value in statistics_dic.items()} fromat_style = date_format_dic.get(cycle) statistics_type = statistics_dic.get(cycle) sql = "SELECT count(*) as counts FROM django_aip.third_part_wechat_articleinformation where (at_id=2 or url<>'') and store_id in %s " sql1 = 'select id,chs_name,eng_name,mall_id from third_part_wechat_store ' if mall_id: sql1 += 'where mall_id = %s ' sql1 = cursor.mogrify(sql1, [mall_id]) cursor.execute(sql1) res1 = cursor.fetchall() store_ids = [i['id'] for i in res1] sql2 = 'select id from third_part_wechat_articleinformation ' _sql2 = [] params2 = [] if store_ids: cursor.execute(sql, [store_ids]) # 可点击素材数量 article_count = cursor.fetchone().get('counts') _sql2.append('store_id in %s') params2.append(store_ids) if at_id: _sql2.append('at_id in %s') params2.append(at_id) sql2 += 'where ' + ' and '.join(_sql2) cursor.execute(sql2, params2) res2 = cursor.fetchall() article_ids = [i['id'] for i in res2] sql3 = "select operation_type,date_format(operation_time,%s) as cycle," \ "chat_record_id from third_part_wechat_chatrecordarticle " \ "where article_id in %s " param3 = [fromat_style, article_ids] if start_date: sql3 += 'and operation_time > %s ' param3.append(start_date) if end_date: sql3 += 'and operation_time < %s ' param3.append(end_date) sql3 += 'order by operation_time ' cursor.execute(sql3, param3) res3 = cursor.fetchall() if res3: analysis_one_set(res3, mall_id, article_count, statistics_type)
def main(): print('标签分析 data_analysis_keywordcumulate') sql_all = 'SELECT statistics_type, max(add_time) as last_time FROM django_aip.data_analysis_keywordcumulate group by statistics_type' sql_mall = 'select id from django_aip.third_part_wechat_mall' cursor.execute(sql_all) res_alltime = cursor.fetchall() cursor.execute(sql_mall) malls = [i['id'] for i in cursor.fetchall()] statistics_dic = dict(( (2, '年'), (3, '月'), (4, '周'), (5, '天'), )) if not res_alltime: res_alltime = [{ 'statistics_type': i, 'last_time': None } for i in statistics_dic] exies_statistics = set([i['statistics_type'] for i in res_alltime]) all_statistics = set([i for i in statistics_dic]) not_exies_statistics = all_statistics - exies_statistics if not_exies_statistics: not_exies_statistics = list(not_exies_statistics) for i in not_exies_statistics: res_alltime.append({'statistics_type': i, 'last_time': None}) for i in res_alltime: statistics_type = i['statistics_type'] cycle = statistics_dic.get(statistics_type) start_date = i.get('last_time') or '2017-01-01' if statistics_type == 2: end_date = getFirstDayOfCurYear() elif statistics_type == 3: end_date = getFirstDayOfCurMonth() elif statistics_type == 4: end_date = getFirstDayOfCurWeek() elif statistics_type == 5: end_date = getCurDay() for mall_id in malls: flag = isFullCycle(start_date, end_date, cycle) if flag: print('统计周期:%s 统计日期范围:%s--%s 统计商城id:%d ' % (cycle, start_date, end_date, mall_id)) tag_analysis(cycle=cycle, at_id=None, mall_id=mall_id, start_date=start_date, end_date=end_date)
def main(): cursor.execute("select * from django_aip.third_part_wechat_praisestore where sid in (15454921)") storelis = cursor.fetchall() for i in storelis: page_size = 100 access_token = i['access_token'] sid = i['sid'] print("-------------------__>",sid) offline_url = "https://open.youzan.com/api/oauthentry/youzan.multistore.offline/3.0.0/search" params = {"access_token":access_token,"page_size":1,"page_no":1} r = requests.get(offline_url,params=params) if r.status_code == 200: response = r.json().get("response") if response: offline_count=response['count'] page_max = offline_count//page_size if offline_count%page_size: page_max+=1 if page_max: for page_no in range(1,page_max+1): params = {"access_token": access_token, "page_size": page_size, "page_no": page_no} r = requests.get(offline_url,params=params) if r.status_code == 200: if r.json().get("response").get("list"): offline_list = r.json().get("response").get("list") or [] for offline_info in offline_list: offline_id = offline_info["id"] kdt_id = sid name = offline_info["name"] province = offline_info["province"] city = offline_info["city"] area = offline_info["area"] address = offline_info["address"] county_id = offline_info["county_id"] lng = offline_info["lng"] lat = offline_info["lat"] if lng == "0" or lat == "0": detail_address = (province+city+address).split("(")[0].split("(")[0].encode() print(detail_address) Map = CloudQQMap() lat,lng = Map.getPoint({"address":detail_address}) print(offline_id,lat,lng) goods_list = get_goods_list(access_token,offline_id) if goods_list: goods_list = ",".join(goods_list) else: goods_list = None sql_param = [offline_id,name,province,city,area,address,county_id,lng, lat,goods_list,kdt_id] cursor.execute("replace into django_aip.third_part_wechat_praisestoreonline" "(id,`name`,province,city,area,address,county_id,lng,lat,goods_list," "kdt_id)VALUE(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)",sql_param) conn.commit() else: print("get offline err")
def reorganize(self): for store in self.stores: self.sid = store["sid"] update_time_start = conf.get(str(self.sid)) end = datetime.datetime.now().strftime("%Y-%m-%d %X") # endtimsp= str(int(time.time()*1000)) conf[str(self.sid)] = end name = store["name"] token = store["access_token"] r_auth = requests.get(self.basic_auth + token) r_auth_json = r_auth.json() if r_auth_json.get("error_response"): print("%s店铺授权出错 可能解除授权" % (name)) continue payload = {"access_token": token} print("--------店铺:%s (%s)-----" % (name, self.sid)) onsale_count = self.goods_count(self.onsale, payload) outsale_count = self.goods_count(self.outsale, payload) print("在售:%d , 仓库:%d" % (onsale_count, outsale_count)) onsale_lis = self.AllGoodsLis(self.onsale, onsale_count, payload) outsale_lis = self.AllGoodsLis(self.outsale, outsale_count, payload) cursor.execute( "select item_id from third_part_wechat_praisegoods " "where kdt_id=%s and is_display=1", [self.sid]) exist_lis = [i["item_id"] for i in cursor.fetchall()] if update_time_start: update_lis = [ i["item_id"] for i in onsale_lis if i["update_time"] > update_time_start ] else: update_lis = [i["item_id"] for i in onsale_lis] onsale_lis = [i["item_id"] for i in onsale_lis] outsale_lis = [i["item_id"] for i in outsale_lis] delete_lis = list( set(exist_lis) - set(onsale_lis) - set(outsale_lis)) hide_lis = outsale_lis print("更新:%d ,删除:%d, 隐藏:%d" % (len(update_lis), len(delete_lis), len(hide_lis))) self.handerData(update_lis, onsale_lis, delete_lis, token, self.sid) with open("sync_goods_conf.py", "w+") as f: st = "conf=" + str(conf) f.write(st) f.close()
def django_aip_es_praise(): sql = "select * from v_praisegoods" cursor.execute(sql) res_all = cursor.fetchall() for i in res_all: i['_id'] = i['item_id'] # print(i) if res_all: count = 20 # everytime insert es data`s count insert_count = int(random._ceil(len(res_all) / float(count))) for cut in xrange(1, insert_count + 1): action_lis = res_all[(cut - 1) * count:cut * count] ok = helpers.bulk(client, actions=action_lis, index=praise_es, doc_type="goods") print(ok)
def dataupdate1(data_dic, table): sql = "select * from data_analysis_praiseuserstatistics where kdt_id =%s and from_wechat_user_id=%s" cursor.execute(sql, [data_dic['kdt_id'], data_dic['from_wechat_user_id']]) res = cursor.fetchall() if res: sql = 'update data_analysis_praiseuserstatistics set add_time_desc=%s ,add_time=%s,' \ 'wechat_msg_count=wechat_msg_count+%s,hf_msg_count=hf_msg_count+%s,' \ 'wechat_click_count=wechat_click_count+%s,hf_click_count=hf_click_count+%s ' \ 'where from_wechat_user_id=%s and kdt_id=%s' params = [ data_dic['add_time_desc'], data_dic['add_time'], data_dic['wechat_msg_count'], data_dic['hf_msg_count'], data_dic['wechat_click_count'], data_dic['hf_click_count'], data_dic['from_wechat_user_id'], data_dic['kdt_id'] ] cursor.execute(sql, params) conn.commit() else: dataimport(data_dic, table)
def __init__(self, kdt_id=[]): if kdt_id: cursor.execute( "select sid,`name`,access_token from third_part_wechat_praisestore " "where sid in %s", [kdt_id]) else: cursor.execute( "select sid,`name`,access_token from third_part_wechat_praisestore" ) self.stores = cursor.fetchall() self.page_size = 100 self.item = "https://open.youzan.com/api/oauthentry/youzan.item/3.0.0/get" self.onsale = "https://open.youzan.com/api/oauthentry/youzan.items.onsale/3.0.0/get" self.outsale = "https://open.youzan.com/api/oauthentry/youzan.items.inventory/3.0.0/get" self.delete_lis = [] # 需要删除的商品 self.update_lis = [] # 需要更新的商品 self.hide_lis = [] # 需要隐藏的商品 self.onsale_lis = [] # 在售商品 self.outsale_lis = [] # 仓库商品 self.basic_auth = "https://open.youzan.com/api/oauthentry/youzan.shop.basic/3.0.0/get" \ "?access_token="
def streammsg_analysis(cycle=None, at_id=None, mall_id=None, start_date=None, end_date=None, wechat_account_id=None, to_username=None): # cycle:统计周期 # mall_id:商城id # at_id:素材类型 # 默认去掉公众号文章素材(at_id=1) statistics_dic = dict(( (1, '总计'), (2, '年'), (3, '月'), (4, '周'), (5, '天'), )) date_format_dic = {'年': '%Y', '月': '%Y-%m', '周': '%x-%v', '天': '%Y-%m-%d'} statistics_dic = {value: key for key, value in statistics_dic.items()} fromat_style = date_format_dic.get(cycle) statistics_type = statistics_dic.get(cycle) sql = 'select id,from_username,ask_type_id,reply_type_id,DATE_FORMAT(add_time,%s) as cycle from django_aip.third_part_wechat_chatrecord ' \ 'where to_username=%s ' params = [fromat_style, to_username] if start_date: sql += 'and add_time>=%s ' params.append(start_date) if end_date: sql += 'and add_time<%s ' params.append(end_date) sql += 'order by add_time ' cursor.execute(sql, params) res = cursor.fetchall() if res: analysis_one_set(res, mall_id, statistics_type, wechat_account_id, to_username)
def store_analysis(cycle='天', at_id=None, mall_id=None, start_date=None, end_date=None): # cycle:统计周期 # mall_id:商城id # at_id:素材类型 # 默认去掉公众号文章素材(at_id=1) statistics_dic = dict(( (1, '总计'), (2, '年'), (3, '月'), (4, '周'), (5, '天'), )) date_format_dic = {'年': '%Y', '月': '%Y-%m', '周': '%x-%v', '天': '%Y-%m-%d'} statistics_dic = {value: key for key, value in statistics_dic.items()} fromat_style = date_format_dic.get(cycle) statistics_type = statistics_dic.get(cycle) sql1 = 'select id,chs_name,eng_name,mall_id from third_part_wechat_store ' if mall_id: sql1 += 'where mall_id = %s ' sql1 = cursor.mogrify(sql1, [mall_id]) cursor.execute(sql1) res1 = cursor.fetchall() for store_info in res1: chs_name = store_info['chs_name'] eng_name = store_info['eng_name'] mall_id = store_info['mall_id'] store_id = store_info['id'] sql2 = 'select id,store_id from third_part_wechat_articleinformation ' _sql2 = [] params2 = [] if store_id: _sql2.append('store_id = %s') params2.append(store_id) if at_id: _sql2.append('at_id in %s') params2.append(at_id) if params2: sql2 += 'where ' + ' and '.join(_sql2) sql2 = cursor.mogrify(sql2, params2) cursor.execute(sql2) res2 = cursor.fetchall() articles = [i['id'] for i in res2] sql3 = "select operation_type,date_format(operation_time,%s) as cycle," \ "chat_record_id from third_part_wechat_chatrecordarticle " \ "where article_id in %s " param3_end = [] if start_date: sql3 += 'and operation_time > %s ' param3_end += [start_date] if end_date: sql3 += 'and operation_time < %s ' param3_end += [end_date] if chs_name: store_name = chs_name else: store_name = eng_name if articles: analysis_one_set(sql3, articles, fromat_style, param3_end, store_name, statistics_type, mall_id, store_id)
def analysis_one_set(sql3, article_id, fromat_style, param3_end, store_name, statistics_type, mall_id, store_id): params3 = [fromat_style, article_id] + param3_end try: cursor.execute(sql3, params3) res = cursor.fetchall() except Exception as e: print(sql3) print(params3) print(e) if res: cycle_lis = list(set([i['cycle'] for i in res])) cycle_lis.sort(key=lambda i: int(''.join(i.split('-'))), reverse=False) for add_time_desc in cycle_lis: chat_records_repeat = [ i['chat_record_id'] for i in res if i['cycle'] == add_time_desc ] chat_records_click_repeat = [ i['chat_record_id'] for i in res if i['operation_type'] == 1 and i['cycle'] == add_time_desc ] chat_records = set(chat_records_repeat) chat_records_click = set(chat_records_click_repeat) # 去重的呼出次数 ask_count = len(chat_records) # 去重的点击数 click_num = len(chat_records_click) try: sql = 'select count(distinct(from_username)) as counts from third_part_wechat_chatrecord where id in %s' if len(chat_records) > 1: cursor.execute(sql, [chat_records]) ask_count_person = cursor.fetchone().get('counts') else: ask_count_person = len(chat_records) if len(chat_records_click) > 1: cursor.execute(sql, [chat_records_click]) click_num_person = cursor.fetchone().get('counts') else: click_num_person = len(chat_records_click) except Exception as e: print(e) # print(cursor.mogrify(sql,[chat_records])) # print(article_id) add_time = getEveryCycleLastDay(add_time_desc, statistics_type) # convert_rate = round(click_num*100/float(ask_count),2) click_rate = round(click_num * 100 / float(ask_count), 2) data_dic = { 'store_id': store_id, 'store_name': store_name, 'statistics_type': statistics_type, 'ask_count': ask_count, 'ask_count_person': ask_count_person, 'click_num': click_num, 'click_num_person': click_num_person, # 'convert_rate':convert_rate, 'click_rate': click_rate, 'add_time': add_time, 'mall_id': mall_id, 'add_time_desc': add_time_desc } dataimport(data_dic, 'data_analysis_storecumulate')
def main(): print('互动量分析(商城为单位) data_analysis_upstreammsgday') sql_all = "SELECT wechat_account_id,min(ref_date) as last_time FROM django_aip.data_analysis_upstreammsgday where msg_type =6 and ref_date>'2018-03-12' and msg_total_count=0 group by wechat_account_id" sql_mall = "select mall_id,id as wechat_account_id,user_name from django_aip.third_part_wechat_wechataccount where mall_id<>''" cursor.execute(sql_all) res_alltime = cursor.fetchall() cursor.execute(sql_mall) res_malls = cursor.fetchall() wechat_accounts = [i['wechat_account_id'] for i in res_malls] mall_wechat_dic = {i['wechat_account_id']: i['mall_id'] for i in res_malls} mall_username_dic = { i['wechat_account_id']: i['user_name'] for i in res_malls } statistics_dic = dict(( # (2, '年'), # (3, '月'), # (4, '周'), (5, '天'), )) if not res_alltime: res_alltime = [{ 'statistics_type': 5, 'wechat_account_id': i, 'last_time': None } for i in wechat_accounts] else: exexits_wechat_accounts = set( [i['wechat_account_id'] for i in res_alltime]) not_exexits_wechat_accounts = set( wechat_accounts) - exexits_wechat_accounts not_exexits_wechat_accounts = list(not_exexits_wechat_accounts).sort( key=lambda i: i, reverse=False) if not_exexits_wechat_accounts: for i in not_exexits_wechat_accounts: res_alltime.append({ 'statistics_type': 5, 'wechat_account_id': i, 'last_time': None }) for i in res_alltime: i.update({'statistics_type': 5}) for i in res_alltime: statistics_type = i['statistics_type'] cycle = statistics_dic.get(statistics_type) start_date = i.get('last_time') if statistics_type == 2: end_date = getFirstDayOfCurYear() elif statistics_type == 3: end_date = getFirstDayOfCurMonth() elif statistics_type == 4: end_date = getFirstDayOfCurWeek() elif statistics_type == 5: end_date = getCurDay() wechat_account_id = i['wechat_account_id'] mall_id = mall_wechat_dic.get(wechat_account_id) to_username = mall_username_dic.get(wechat_account_id) flag = isFullCycle(start_date, end_date, cycle) # print('统计周期:%s 统计日期范围:%s--%s 统计商城id:%d ' % (cycle, start_date, end_date, mall_id)) if flag: print('统计周期:%s 统计日期范围:%s--%s 统计商城id:%d ' % (cycle, start_date, end_date, wechat_account_id)) streammsg_analysis(cycle=cycle, at_id=None, mall_id=mall_id, start_date=start_date, end_date=end_date, wechat_account_id=wechat_account_id, to_username=to_username)
def statistic(store_lis=None, func=None, start=None, end=None): if func.__name__ == 'praise_statistics1': print(u'统计每天互动概况') cursor.execute("select max(add_time_desc) as start,kdt_id " "from data_analysis_praisestatistics GROUP BY kdt_id") dateinfo = cursor.fetchall() elif func.__name__ == 'praise_statistics2': print(u'统计每人互动概况') cursor.execute( "select DATE_FORMAT(max(date_sub(add_time,INTERVAL 1 DAY)),%s) as start,kdt_id " "from data_analysis_praiseuserstatistics GROUP BY kdt_id", ['%Y-%m-%d']) dateinfo = cursor.fetchall() else: return if not dateinfo: dateinfo = {} datedic = {str(i['kdt_id']): i['start'] for i in dateinfo} if store_lis: sql = "select sid,`name` from third_part_wechat_praisestore where sid in %s" cursor.execute(sql, [store_lis]) store_lis = cursor.fetchall() else: sql = "select sid,`name` from third_part_wechat_praisestore " cursor.execute(sql) store_lis = cursor.fetchall() for store_info in store_lis: add_time = datetime.datetime.strftime(datetime.datetime.now(), '%Y-%m-%d') store_name = store_info["name"] kdt_id = store_info["sid"] store_id = store_info["sid"] start = datedic.get(str(store_id)) sql = "select id,reply_type_id,from_wechat_user_id, operation_type,from_wechat_user_id," \ "DATE_FORMAT(operation_time,%s) as cycle,DATE_FORMAT(add_time,%s) as last_time,operation_time " \ "from v_praise_statistic " \ "where store_id =%s " params = ['%Y-%m-%d', '%Y-%m-%d %H:%i:%s', store_id] if start: sql += "and DATE_FORMAT(operation_time,%s)> %s " params += ['%Y-%m-%d', start] if end: sql += "and DATE_FORMAT(operation_time,%s)< %s " params += ['%Y-%m-%d', end] cursor.execute(sql, params) res = cursor.fetchall() if res: sql = "select id,from_wechat_user_id,DATE_FORMAT(operation_time,%s) as add_time_desc," \ "DATE_FORMAT(operation_time,%s) as cycle from v_praise_statistic where store_id=%s " params = ['%Y-%m-%d %H:%i:%s', '%Y-%m-%d', kdt_id] if start: sql += "and DATE_FORMAT(operation_time,%s)>%s" params += ['%Y-%m-%d', start] if end: sql += "and DATE_FORMAT(operation_time,%s)<%s" params += ['%Y-%m-%d', end] cursor.execute(sql, params) cycle_lis = cursor.fetchall() func(res, add_time, store_name, kdt_id, cycle_lis, start, end) else: cycle_lis = [] func(res, add_time, store_name, kdt_id, cycle_lis, start, end)
def handerData(self, update_lis, onsale_lis, delete_lis, token, sid): err_items = [] params = [] try: for item_id in update_lis: payload = {"access_token": token, "item_id": item_id} r_item = requests.get(self.item, params=payload) data = r_item.json() data = data.get('response') item = data.get('item') title = item.get('title') price = item.get('price') item_type = item.get('item_type') sold_num = item.get('sold_num') detail_url = item.get('detail_url') quantity = item.get('quantity') post_fee = item.get('post_fee') picture = item.get('pic_url') created_time = item.get('created') update_time = created_time alias = item.get('alias') post_type = item.get('post_type') kdt_id = item.get('kdt_id') is_display = 1 cid = item.get('cid') item_no = item.get('item_no') item_tags = str(item.get('tag_ids'))[1:-1] ordering_type = 3 params.append([ item_id, title, price, item_type, sold_num, detail_url, quantity, post_fee, picture, created_time, update_time, alias, post_type, kdt_id, is_display, cid, item_no, item_tags, ordering_type ]) except Exception as e: err_items.append(item_id) print(err_items) print("更新请求出错:%d条" % (len(err_items))) if params: sql1 = "insert into third_part_wechat_praisegoods" \ "(item_id,title,price,item_type,sold_num,detail_url," \ "quantity,post_fee,picture,created_time, update_time," \ "alias,post_type,kdt_id,is_display,cid, item_no,item_tags," \ "ordering_type)" \ "VALUES " sql2 = "update third_part_wechat_praisegoods set" \ "item_id=%s,title=%s,price=%s,item_type=%s,sold_num=%s,detail_url=%s," \ "quantity=%s,post_fee=%s,picture=%s,created_time=%s,update_time=%s," \ "alias=%s,post_type=%s,kdt_id=%s,is_display=%s,cid=%s,item_no=%s," \ "item_tags=%s,ordering_type=%s" \ "VALUES " for iteminfo in params: cursor.execute( "select id from third_part_wechat_praisegoods where item_id=%s", [iteminfo[0]]) if cursor.fetchone(): cursor.execute(sql2, iteminfo[1:] + iteminfo[0]) conn.commit() else: cursor.execute(sql1, iteminfo) conn.commit() if onsale_lis: cursor.execute( "update third_part_wechat_praisegoods set is_display=%s " "where item_id in %s and kdt_id=%s", [1, onsale_lis, self.sid]) conn.commit() cursor.execute( "update third_part_wechat_praisegoods set is_display=%s " "where item_id not in %s and kdt_id=%s", [0, onsale_lis, self.sid]) conn.commit() if delete_lis: cursor.execute( "select * from v_praisegoods " "where kdt_id=%s and item_id not in %s", [self.sid, delete_lis]) else: cursor.execute("select * from v_praisegoods " "where kdt_id=%s ", [self.sid]) res_all = cursor.fetchall() for i in res_all: i['_id'] = i['item_id'] # 导入es if res_all: count = 20 # everytime insert es data`s count insert_count = int(random._ceil(len(res_all) / float(count))) for cut in xrange(1, insert_count + 1): action_lis = res_all[(cut - 1) * count:cut * count] ok = helpers.bulk(client, actions=action_lis, index=praise_es, doc_type="goods") # 删除 # print(delete_lis) if delete_lis: if sid == 19235176: sql = "select * from v_praisegoods where item_id in %s" cursor.execute(sql, [delete_lis]) res_all = cursor.fetchall() for i in res_all: i['_id'] = i['item_id'] # print(i) if res_all: count = 20 # everytime insert es data`s count insert_count = int( random._ceil(len(res_all) / float(count))) for cut in xrange(1, insert_count + 1): action_lis = res_all[(cut - 1) * count:cut * count] ok = helpers.bulk(client, actions=action_lis, index=praise_es, doc_type="goods") print(ok) else: s = Search(using=client,index=praise_es,doc_type="goods")\ .filter("terms",item_id=delete_lis).delete()
def tag_analysis(cycle=None, at_id=None, mall_id=None, start_date=None, end_date=None): # cycle:统计周期 # mall_id:商城id # at_id:素材类型 # 默认素材(at_id) statistics_dic = dict(( (1, '总计'), (2, '年'), (3, '月'), (4, '周'), (5, '天'), )) date_format_dic = {'年': '%Y', '月': '%Y-%m', '周': '%x-%v', '天': '%Y-%m-%d'} statistics_dic = {value: key for key, value in statistics_dic.items()} fromat_style = date_format_dic.get(cycle) statistics_type = statistics_dic.get(cycle) sql1 = 'select distinct(tag_id) as tag_id from third_part_wechat_chatrecordtag' sql3 = 'select id from third_part_wechat_articleinformation ' params3 = [] if at_id: sql3 += 'where at_id in %s ' params3.append(at_id) if mall_id: sql2 = 'select id from third_part_wechat_store where mall_id= %s' cursor.execute(sql2, [mall_id]) stores = [i['id'] for i in cursor.fetchall()] if stores: params3.append(stores) if at_id: sql3 += 'and store_id in %s' else: sql3 += 'where store_id in %s' cursor.execute(sql1) tags = cursor.fetchall() if params3: cursor.execute(sql3, params3) else: print('mall is not data') return artcle_lis = [i['id'] for i in cursor.fetchall()] if artcle_lis: flag_sql = 'select * from third_part_wechat_chatrecordarticle where article_id in %s ' cursor.execute(flag_sql, [artcle_lis]) flag = cursor.fetchall() if flag: sql5 = "select chat_record_id,operation_type,DATE_FORMAT(operation_time,%s) as cycle," \ "unix_timestamp(operation_time) as time_zone " \ "from third_part_wechat_chatrecordarticle " \ "where chat_record_id in %s and article_id in %s " params5 = [fromat_style] end_params5 = [] if start_date: sql5 += 'and operation_time > %s ' end_params5.append(start_date) if end_date: sql5 += 'and operation_time < %s ' end_params5.append(end_date) sql5 += 'ORDER BY cycle ' for i in tags: tag_id = i['tag_id'] sql4 = 'select distinct(chat_record_id) as chat_record_id from third_part_wechat_chatrecordtag where tag_id=%s' cursor.execute(sql4, [tag_id]) chat_records = [i['chat_record_id'] for i in cursor.fetchall()] mid_params5 = [chat_records, artcle_lis] _params5 = params5 + mid_params5 + end_params5 cursor.execute(sql5, _params5) res = cursor.fetchall() if res: sql6 = 'select tag_name,tag_type from third_part_wechat_tags where id =%s' cursor.execute(sql6, [tag_id]) tag_info = cursor.fetchone() tag_name = tag_info.get('tag_name') tag_type = tag_info.get('tag_type') mall_id = mall_id analysis_one_set(res, tag_name, tag_type, mall_id, statistics_type, tag_id)
def django_aip_es(es_mall_lis): article_id_end = None begin_date = datetime.datetime.strftime(datetime.datetime.now(), "%Y-%m-%d") sql = "select id,chs_name from third_part_wechat_mall" cursor.execute(sql) mallinfo = cursor.fetchall() mall_dic = {i["id"]: i["chs_name"] for i in mallinfo} for malls_id in es_mall_lis: malls = mall_dic.get(malls_id) s = Search(using=client, index="django_aip_es")\ .filter("term",malls_id=malls_id)\ .sort("-article_id")[:1] response = s.execute() if response: max_article_id = response[0]["article_id"] else: max_article_id = 0 print("in es max article_id is %d" % (max_article_id)) sql = "select * from third_part_wechat_articleinformation where id>%s and mall_id=%s and at_id in %s order by id" cursor.execute(sql, [max_article_id, malls_id, [1, 4]]) res_all = cursor.fetchall() print(u"%d %s %d条新数据" % (malls_id, malls, len(res_all))) if res_all: count = 20 # everytime insert es data`s count insert_count = int(random._ceil(len(res_all) / float(count))) for cut in xrange(1, insert_count + 1): action_lis = [] datas = res_all[(cut - 1) * count:cut * count] for action in datas: article_id_end = action["id"] sql = "select tag_name,chs_name,eng_name from v_article_tag_store where articleinformation_id=%s" cursor.execute(sql, [action["id"]]) res = cursor.fetchall() if res: stores = res[0].get("chs_name") or res[0].get( "eng_name") tags = [i["tag_name"] for i in res] else: tags = [] sql = "select chs_name,eng_name from third_part_wechat_store where id= %s" cursor.execute(sql, [action["store_id"]]) res = cursor.fetchone() stores = res.get("chs_name") or res.get("eng_name") action_lis.append({ "article_id": action["id"], "title": action["title"], # "raw_content":action["raw_content"], "thumb_url": action["thumb_url"], "url": action["url"], "tags": tags, "send_date": action["send_date"].strftime("%Y-%m-%d"), "begin_date": begin_date, "malls": malls, "malls_id": malls_id, "stores": stores, "stores_id": action["store_id"], "article_type": action["at_id"] }) # 批量导入es ok, err = helpers.bulk(client, actions=action_lis, index="django_aip_es", doc_type="article") print(ok, err) print(max_article_id, article_id_end) # 清除raw_content update_at_sql = "update third_part_wechat_articleinformation set raw_content=%s where mall_id=%s and id>%s and id<=%s " cursor.execute(update_at_sql, [None, malls_id, max_article_id, article_id_end]) conn.commit()
def praise_statistics2(res, add_time, store_name, kdt_id, cycle_lis, start, end): print(u"统计 %s 中,店铺ID: %d 周期 :天" % (store_name, kdt_id)) cycle_dic = {} for info in cycle_lis: if cycle_dic.get(str(info["from_wechat_user_id"])): cycle_dic[str(info["from_wechat_user_id"] )]['add_time_desc'] = info["add_time_desc"] else: cycle_dic[str(info["from_wechat_user_id"])] = { "add_time_desc": info["add_time_desc"], "from_wechat_user_id": info["from_wechat_user_id"] } cycle_infos = { k: { "from_wechat_user_id": i["from_wechat_user_id"], "add_time_desc": i['add_time_desc'], # 最后一次聊天的时间 "add_time": add_time, "store_name": store_name, "kdt_id": kdt_id, "statistics_type": 5, "wechat_msg_count": [], "hf_msg_count": [], "wechat_click_count": 0, "hf_click_count": 0 } for k, i in cycle_dic.items() } for action in res: from_wechat_user_id = str(action["from_wechat_user_id"]) reply_type_id = action["reply_type_id"] operation_type = action["operation_type"] last_time = action['last_time'] if last_time > cycle_infos[from_wechat_user_id]['add_time_desc']: cycle_infos[from_wechat_user_id]['add_time_desc'] = last_time id = action["id"] if reply_type_id == 8: if operation_type > 0: cycle_infos[from_wechat_user_id]["hf_click_count"] += 1 cycle_infos[from_wechat_user_id]["hf_msg_count"].append(id) elif reply_type_id == 7: if operation_type > 0: cycle_infos[from_wechat_user_id]["wechat_click_count"] += 1 cycle_infos[from_wechat_user_id]["wechat_msg_count"].append(id) # 结果入库前做整理 statistics_result = [] sql1 = "select DISTINCT(id) as id,add_time from third_part_wechat_praisechatrecord where store_id=%s " params1 = [kdt_id] if start: sql1 += "and DATE_FORMAT(add_time,%s)>%s " params1 += ['%Y-%m-%d', start] if end: sql1 += "and DATE_FORMAT(add_time,%s)<%s " params1 += ['%Y-%m-%d', end] cursor.execute(sql1, params1) ids1 = set([i['id'] for i in cursor.fetchall()]) ids2 = set([i['id'] for i in cycle_lis]) ids = list(ids1 - ids2) if ids: sql = "select id,reply_type_id,from_wechat_user_id,DATE_FORMAT(add_time,%s) as last_time,add_time " \ "from third_part_wechat_praisechatrecord " \ "where id in %s " params = ['%Y-%m-%d %H:%i:%s', ids] cursor.execute(sql, params) res = cursor.fetchall() for action in res: from_wechat_user_id = str(action["from_wechat_user_id"]) reply_type_id = action["reply_type_id"] last_time = action['last_time'] id = action['id'] if cycle_infos.get(from_wechat_user_id): if reply_type_id == 8: cycle_infos[from_wechat_user_id]["hf_msg_count"].append(id) elif reply_type_id == 7: cycle_infos[from_wechat_user_id][ "wechat_msg_count"].append(id) if last_time > cycle_infos[from_wechat_user_id][ "add_time_desc"]: cycle_infos[from_wechat_user_id][ "add_time_desc"] = last_time else: cycle_infos[from_wechat_user_id] = { "from_wechat_user_id": action["from_wechat_user_id"], "add_time_desc": last_time, "add_time": add_time, "store_name": store_name, "kdt_id": kdt_id, "statistics_type": 5, "wechat_msg_count": [], "hf_msg_count": [], "wechat_click_count": 0, "hf_click_count": 0 } if reply_type_id == 8: cycle_infos[from_wechat_user_id]["hf_msg_count"].append(id) elif reply_type_id == 7: cycle_infos[from_wechat_user_id][ "wechat_msg_count"].append(id) for cycle, cycle_info in cycle_infos.items(): statistics_result.append(cycle_info) for cycle, cycle_info in cycle_infos.items(): cycle_info['hf_msg_count'] = len(set(cycle_info['hf_msg_count'])) cycle_info['wechat_msg_count'] = len( set(cycle_info['wechat_msg_count'])) statistics_result.sort(key=lambda i: i["add_time_desc"], reverse=False) for cycle_info in statistics_result: # print(cycle_info) dataupdate1(cycle_info, "data_analysis_praiseuserstatistics")
def praise_statistics1(res, add_time, store_name, kdt_id, cycle_lis, start, end): print(u"统计 %s 中,店铺ID: %d 周期 :天" % (store_name, kdt_id)) _cycle_lis = cycle_lis cycle_lis = list(set([i["cycle"] for i in cycle_lis])) cycle_lis.sort(key=lambda i: i, reverse=False) cycle_infos = { i: { "add_time_desc": i, "add_time": add_time, "store_name": store_name, "kdt_id": kdt_id, "statistics_type": 5, "hf_visit_count": [], "wechat_visit_count": [], "hf_push_count": 0, "wechat_push_count": 0, "hf_click_count": 0, "wechat_click_count": 0 } for i in cycle_lis } for action in res: reply_type_id = action["reply_type_id"] operation_type = action["operation_type"] cycle = action['cycle'] if reply_type_id == 8: if operation_type == 0: cycle_infos[cycle]['hf_push_count'] += 1 else: cycle_infos[cycle]['hf_click_count'] += 1 cycle_infos[cycle]['hf_visit_count'].append( action['from_wechat_user_id']) elif reply_type_id == 7: if operation_type == 0: cycle_infos[cycle]['wechat_push_count'] += 1 else: cycle_infos[cycle]['wechat_click_count'] += 1 cycle_infos[cycle]['wechat_visit_count'].append( action['from_wechat_user_id']) # 结果入库前做整理 statistics_result = [] sql1 = "select DISTINCT(id) as id,add_time from third_part_wechat_praisechatrecord where store_id=%s " params1 = [kdt_id] if start: sql1 += "and DATE_FORMAT(add_time,%s)>%s " params1 += ['%Y-%m-%d', start] if end: sql1 += "and DATE_FORMAT(add_time,%s)<%s " params1 += ['%Y-%m-%d', end] cursor.execute(sql1, params1) ids1 = set([i['id'] for i in cursor.fetchall()]) ids2 = set([i['id'] for i in _cycle_lis]) ids = list(ids1 - ids2) if ids: sql = "select reply_type_id,from_wechat_user_id,DATE_FORMAT(add_time,%s) as add_time_desc,add_time " \ "from third_part_wechat_praisechatrecord " \ "where id in %s " params = ['%Y-%m-%d', ids] cursor.execute(sql, params) res = cursor.fetchall() for action in res: cycle = action['add_time_desc'] reply_type_id = action['reply_type_id'] if not cycle_infos.get(cycle): cycle_infos[cycle] = { "add_time_desc": cycle, "add_time": add_time, "store_name": store_name, "kdt_id": kdt_id, "statistics_type": 5, "hf_visit_count": [], "wechat_visit_count": [], "hf_push_count": 0, "wechat_push_count": 0, "hf_click_count": 0, "wechat_click_count": 0 } if reply_type_id == 8: cycle_infos[cycle]['hf_visit_count'].append( action['from_wechat_user_id']) elif reply_type_id == 7: cycle_infos[cycle]['wechat_visit_count'].append( action['from_wechat_user_id']) for cycle, cycle_info in cycle_infos.items(): cycle_info['wechat_visit_count'] = len( set(cycle_info['wechat_visit_count'])) cycle_info['hf_visit_count'] = len(set(cycle_info['hf_visit_count'])) for cycle, cycle_info in cycle_infos.items(): statistics_result.append(cycle_info) statistics_result.sort(key=lambda i: i["add_time_desc"], reverse=False) for cycle_info in statistics_result: # print(cycle_info) dataimport(cycle_info, "data_analysis_praisestatistics")