def extract_info(x): res = list() try: d_t = json.loads(x) brand = d_t.get('sf_brand') industry = d_t.get('sf_industry') d = d_t['statuses'] if d and isinstance(d, list): for it in d: text = it.get('text') if_ad = setiment.if_ad(text) st = setiment.checking_sentiment(text) if if_ad: st = ('', 0, '', 0, 0) t = [ str(it.get('id')), time_format(it.get('created_at')), it.get('mid'), text, re.sub(re_sub_p, '', it.get('source')), it.get('original_pic'), it.get('reposts_count'), it.get('comments_count'), it.get('goods_count'), it.get('forward'), it.get('user').get('id'), it.get('user').get('screen_name'), it.get('user').get('gender'), it.get('user').get('verified'), it.get('user').get('profile_url'), it.get('user').get('followers_count'), it.get('user').get('friends_count'), it.get('user').get('statuses_count'), it.get('user').get('favourites_count'), it.get('user').get('url'), time_format(it.get('user').get('created_at')), it.get('user').get('url'), it.get('user').get('province'), it.get('user').get('city'), it.get('user').get('description'), '', # weibo_type '', # weibo_type1 abs(if_ad - 1), # is_effect st[2], st[3], st[0], st[1], st[4], # is_neg brand, industry ] res.append(t) except Exception, e: print e.message
re_sub_p = re.compile(u'回复|#.+?#|@.+?[\s::]|\[.+?\]|@.+$|\s+?') res = list() def extract_info(x): try: d_t = json.loads(x) d = d_t['comments'] except Exception, e: # raise ValueError('No comments exists!') return None if isinstance(d, list): for it in d: s = it.get("content").replace('\n', '') st = setiment.checking_sentiment(s) t = [#it.get("id"), it.get("content").replace('\n', ''), it.get("creationTime"), it.get("referenceName"), it.get("referenceTime"), it.get("referenceType"), it.get("replyCount"), it.get("score"), it.get("status"), it.get("usefulVoteCount"), it.get("uselessVoteCount"), it.get("userLevelId"), it.get("userProvince"), it.get("userRegisterTime"), it.get("viewCount"),