Пример #1
0
def extract_info(x):
    res = list()
    try:
        d_t = json.loads(x)
        brand = d_t.get('sf_brand')
        industry = d_t.get('sf_industry')
        d = d_t['statuses']
        if d and isinstance(d, list):
            for it in d:
                text = it.get('text')
                if_ad = setiment.if_ad(text)
                st = setiment.checking_sentiment(text)
                if if_ad:
                    st = ('', 0, '', 0, 0)
                t = [
                    str(it.get('id')),
                    time_format(it.get('created_at')),
                    it.get('mid'),
                    text,
                    re.sub(re_sub_p, '', it.get('source')),
                    it.get('original_pic'),
                    it.get('reposts_count'),
                    it.get('comments_count'),
                    it.get('goods_count'),
                    it.get('forward'),
                    it.get('user').get('id'),
                    it.get('user').get('screen_name'),
                    it.get('user').get('gender'),
                    it.get('user').get('verified'),
                    it.get('user').get('profile_url'),
                    it.get('user').get('followers_count'),
                    it.get('user').get('friends_count'),
                    it.get('user').get('statuses_count'),
                    it.get('user').get('favourites_count'),
                    it.get('user').get('url'),
                    time_format(it.get('user').get('created_at')),
                    it.get('user').get('url'),
                    it.get('user').get('province'),
                    it.get('user').get('city'),
                    it.get('user').get('description'),
                    '',  # weibo_type
                    '',  # weibo_type1
                    abs(if_ad - 1),  # is_effect
                    st[2],
                    st[3],
                    st[0],
                    st[1],
                    st[4],  # is_neg
                    brand,
                    industry
                ]
                res.append(t)
    except Exception, e:
        print e.message
Пример #2
0
re_sub_p = re.compile(u'回复|#.+?#|@.+?[\s::]|\[.+?\]|@.+$|\s+?')

res = list()


def extract_info(x):
    try:
        d_t = json.loads(x)
        d = d_t['comments']
    except Exception, e:
        # raise ValueError('No comments exists!')
        return None
    if isinstance(d, list):
        for it in d:
            s = it.get("content").replace('\n', '')
            st = setiment.checking_sentiment(s)
            t = [#it.get("id"),
                 it.get("content").replace('\n', ''),
                 it.get("creationTime"),
                 it.get("referenceName"),
                 it.get("referenceTime"),
                 it.get("referenceType"),
                 it.get("replyCount"),
                 it.get("score"),
                 it.get("status"),
                 it.get("usefulVoteCount"),
                 it.get("uselessVoteCount"),
                 it.get("userLevelId"),
                 it.get("userProvince"),
                 it.get("userRegisterTime"),
                 it.get("viewCount"),