示例#1
0
def saveBlogs(user_name, blogs, offset):
    if blogs:
        pass
    else:
        return
    for blog in blogs:
        blog['created_date'] = time_bz.timestampToDateTime(blog['timestamp'])
        del blog['timestamp']
        blog['id_str'] = blog['id']
        del blog['id']

        del blog['date']
        del blog['recommended_source']
        del blog['recommended_color']
        del blog['highlighted']
        blog['user_name'] = user_name

        blog['tags'] = json.dumps(blog.get('tags'))
        blog['reblog'] = json.dumps(blog.get('reblog'))
        blog['trail'] = json.dumps(blog.get('trail'))
        blog['photos'] = json.dumps(blog.get('photos'))
        blog['post_author'] = json.dumps(blog.get('post_author'))
        blog['player'] = json.dumps(blog.get('player'))
        blog['dialogue'] = json.dumps(blog.get('dialogue'))

        result = pg.insertIfNotExist(pg, 'tumblr_blog', blog, "id_str='%s'" % blog['id_str'])
        if result is None:  # 有重复记录了,就不再继续了
            print 'have some data'
            return
        else:
            print 'new ', blog['id_str'], blog['type'], 'offset:', offset
    # 继续取
    new_offset = offset + 20
    new_blogs = callGetMeidaApi(user_name, offset=new_offset)['response']['posts']
    saveBlogs(user_name, new_blogs, new_offset)
示例#2
0
def saveBlogs(user_name, blogs, offset):
    if blogs:
        pass
    else:
        return
    for blog in blogs:

        m = public_bz.storage()
        m.id_str = blog['id']
        m.m_type = 'tumblr'
        m.m_user_id = user_name
        m.created_at = time_bz.timestampToDateTime(blog['timestamp'])
        m.extended_entities = json.dumps(blog.get('photos'))
        m.content = None
        m.text = blog.get('caption')
        m.href = blog.get('short_url')
        m.type = blog.get('type')

        result = pg.insertIfNotExist(pg, 'm', m, "id_str='%s' and m_type='tumblr' " % m.id_str)
        if result is None:  # 有重复记录了,就不再继续了
            print 'have same data'
            return
        else:
            print 'new ', m.id_str, m.type, 'offset:', offset, 'name:', user_name
    # 继续取
    new_offset = offset + 20
    new_blogs = callGetMeidaApi(user_name, offset=new_offset)['response']['posts']
    saveBlogs(user_name, new_blogs, new_offset)
示例#3
0
def saveMedias(user, medias):
    '''
    create by bigzhu at 15/09/04 20:58:54 保存meedias

         "attribution":null,
         "tags":[  ],
         "type":"image",
         "location":{  },
         "comments":{  },
         "filter":"Normal",
         "created_time":"1441362020",
         "link":"https:\/\/instagram.com\/p\/7NIHiLJJs3\/",
         "likes":{  },
         "images":{  },
         "users_in_photo":[  ],
         "caption":{  },
         "user_has_liked":false,
         "id":"1066544388859271991_262341",
         "user":{  }
    '''

    for media_d in medias['data']:
        media = storage(media_d)
        db_media = storage()

        #db_media.attribution = media.attribution
        #db_media.tags = json.dumps(media.tags, cls=public_bz.ExtEncoder)
        db_media.type = media.type
        #db_media.location = json.dumps(media.location, cls=public_bz.ExtEncoder)
        db_media.comments = json.dumps(media.comments, cls=public_bz.ExtEncoder)
        db_media.filter = media.filter
        #db_media.created_time = time_bz.timestampToDateTime(media.created_time) + timedelta(hours=8)
        db_media.created_time = time_bz.timestampToDateTime(media.created_time)
        db_media.link = media.link
        #db_media.likes = json.dumps(media.likes, cls=public_bz.ExtEncoder)
        db_media.low_resolution = json.dumps(media.images['low_resolution'])
        db_media.standard_resolution = json.dumps(media.images['standard_resolution'])
        db_media.thumbnail = json.dumps(media.images['thumbnail'])
        #db_media.users_in_photo = json.dumps(media.users_in_photo, cls=public_bz.ExtEncoder)
        if media.caption:
            caption = media.caption
            caption['user_id'] = caption['from']['id']
            del caption['from']
        else:
            caption = ''
        db_media.caption = json.dumps(caption, cls=public_bz.ExtEncoder)

        db_media.id_str = media.id
        db_media.user_id = user.id

        id = pg.insertIfNotExist(pg, 'instagram_media', db_media, "id_str='%s'" % db_media.id_str)
        if id is None:
            raise Exception('重复记录 id=%s, name=%s' % (media.id, user.username))
        else:
            print 'new=', media.id, user.username
        if id is not None and len(medias) <= 2:  # 新增加消息,微信通知只通知2条以内
            openids = public_db.getOpenidsByName('instagram', user.username)
            for data in openids:
                if caption != '':
                    text = caption.get('text')
                else:
                    text = ''
                wechat_oper.sendInstagram(data.openid, text, media.images['low_resolution']['url'], user.username, id)

    if medias['pagination']:
        next_url = medias['pagination']['next_url']
        medias = callGetMeidaApi(next_url=next_url)
        saveMedias(user, medias)
示例#4
0
def saveMedias(user, medias):
    '''
    create by bigzhu at 15/09/04 20:58:54 保存meedias

         "attribution":null,
         "tags":[  ],
         "type":"image",
         "location":{  },
         "comments":{  },
         "filter":"Normal",
         "created_time":"1441362020",
         "link":"https:\/\/instagram.com\/p\/7NIHiLJJs3\/",
         "likes":{  },
         "images":{  },
         "users_in_photo":[  ],
         "caption":{  },
         "user_has_liked":false,
         "id":"1066544388859271991_262341",
         "user":{  }
    '''

    for media_d in medias['data']:
        media = storage(media_d)
        db_media = storage()

        #db_media.attribution = media.attribution
        #db_media.tags = json.dumps(media.tags, cls=public_bz.ExtEncoder)
        db_media.type = media.type
        #db_media.location = json.dumps(media.location, cls=public_bz.ExtEncoder)
        db_media.comments = json.dumps(media.comments,
                                       cls=public_bz.ExtEncoder)
        db_media.filter = media.filter
        #db_media.created_time = time_bz.timestampToDateTime(media.created_time) + timedelta(hours=8)
        db_media.created_time = time_bz.timestampToDateTime(media.created_time)
        db_media.link = media.link
        #db_media.likes = json.dumps(media.likes, cls=public_bz.ExtEncoder)
        db_media.low_resolution = json.dumps(media.images['low_resolution'])
        db_media.standard_resolution = json.dumps(
            media.images['standard_resolution'])
        db_media.thumbnail = json.dumps(media.images['thumbnail'])
        #db_media.users_in_photo = json.dumps(media.users_in_photo, cls=public_bz.ExtEncoder)
        if media.caption:
            caption = media.caption
            caption['user_id'] = caption['from']['id']
            del caption['from']
        else:
            caption = ''
        db_media.caption = json.dumps(caption, cls=public_bz.ExtEncoder)

        db_media.id_str = media.id
        db_media.user_id_str = user.id

        #id = pg.insertIfNotExist(pg, 'instagram_media', db_media, "id_str='%s'" % db_media.id_str)
        m = public_bz.storage()
        m.id_str = db_media.id_str
        m.m_type = 'instagram'
        m.m_user_id = db_media.user_id_str
        m.created_at = db_media.created_time
        m.extended_entities = db_media.standard_resolution
        m.content = db_media.comments
        if media.caption:
            m.text = media.caption['text']
        else:
            m.text = None
        m.href = db_media.link
        m.type = db_media.type

        id = pg.insertIfNotExist(pg, 'm', m, "id_str='%s'" % db_media.id_str)
        if id is None:
            # 似乎就是会有重复的
            error = '重复记录 id=%s, name=%s' % (media.id, user.username)
            print error
            #raise Exception(error)
        else:
            print 'new=', media.id, user.username
        # if id is not None and len(medias) <= 2:  # 新增加消息,微信通知只通知2条以内
        #    openids = public_db.getOpenidsByName('instagram', user.username)
        #    for data in openids:
        #        if caption != '':
        #            text = caption.get('text')
        #        else:
        #            text = ''
        #        wechat_oper.sendInstagram(data.openid, text, media.images['low_resolution']['url'], user.username, id)

    if medias['pagination']:
        next_url = medias['pagination']['next_url']
        medias = callGetMeidaApi(next_url=next_url)
        saveMedias(user, medias)
示例#5
0
def saveTwitter(tweet):
    '''
    create by bigzhu at 15/07/10 14:39:48
        保存twitter
    '''

    del tweet.user._json
    #del tweet.user._api
    tweet.user.entities = json.dumps(tweet.user.entities)

    del tweet.user.id
    pg.insertOrUpdate(pg, 'twitter_user', vars(tweet.user), "id_str='%s'" % tweet.user.id_str)

    tweet.t_user_id = tweet.user.id_str
    screen_name = tweet.user.screen_name
    del tweet.user

    if hasattr(tweet, 'author'):
        #del tweet.author.id
        pg.insertOrUpdate(pg, 'twitter_user', vars(tweet.author), "id_str='%s'" % tweet.author.id_str)
        tweet.t_author_id = tweet.author.id_str

        del tweet.author

    if hasattr(tweet, '_api'):
        del tweet._api
    if hasattr(tweet, '_json'):
        del tweet._json
    # twitter id 太大了 "id": 618948810941673472 导致 psycopg2.DataError: integer out of range
    if hasattr(tweet, 'id'):
        del tweet.id

    if hasattr(tweet, 'entities'):
        tweet.entities = json.dumps(tweet.entities)
    if hasattr(tweet, 'geo'):
        tweet.geo = json.dumps(tweet.geo)
    if hasattr(tweet, 'coordinates'):
        tweet.coordinates = json.dumps(tweet.coordinates)
    if hasattr(tweet, 'extended_entities'):
        tweet.extended_entities = json.dumps(tweet.extended_entities)
    if hasattr(tweet, 'scopes'):
        tweet.scopes = json.dumps(tweet.scopes)

    if hasattr(tweet, 'retweeted_status'):
        saveTwitter(tweet.retweeted_status)
        tweet.retweeted_status = tweet.retweeted_status.id_str
    if hasattr(tweet, 'quoted_status'):
        # print tweet.quoted_status
        del tweet.quoted_status
    #    saveTwitter(tweet.quoted_status)
    #    tweet.quoted_status = tweet.quoted_status.id_str

    # place  是一个对象(我不知道如何处理): Place(_api=<tweepy.api.API object at 0x1808050>
    if hasattr(tweet, 'place'):
        del tweet.place

    # for k, v in vars(tweet).items():
    #    print '%s=%s' % (k, v)

    #return pg.insertIfNotExist(pg, 'twitter_message', vars(tweet), "id_str='%s'" % tweet.id_str)

    m = public_bz.storage()
    m.id_str = tweet.id_str
    m.m_type = 'twitter'
    m.m_user_id = tweet.t_user_id
    m.created_at = tweet.created_at
    if hasattr(tweet, 'extended_entities'):
        m.extended_entities = tweet.extended_entities
    m.content = None
    m.text = tweet.text
    m.href = 'https://twitter.com/'+screen_name+'/status/'+tweet.id_str

    return pg.insertIfNotExist(pg, 'm', m, "id_str='%s' and m_type='twitter'" % tweet.id_str)
示例#6
0
def saveTwitter(tweet):
    '''
    create by bigzhu at 15/07/10 14:39:48
        保存twitter
    '''

    del tweet.user._json
    #del tweet.user._api
    tweet.user.entities = json.dumps(tweet.user.entities)

    del tweet.user.id
    pg.insertOrUpdate(pg, 'twitter_user', vars(tweet.user),
                      "id_str='%s'" % tweet.user.id_str)

    tweet.t_user_id = tweet.user.id_str
    screen_name = tweet.user.screen_name
    del tweet.user

    if hasattr(tweet, 'author'):
        #del tweet.author.id
        pg.insertOrUpdate(pg, 'twitter_user', vars(tweet.author),
                          "id_str='%s'" % tweet.author.id_str)
        tweet.t_author_id = tweet.author.id_str

        del tweet.author

    if hasattr(tweet, '_api'):
        del tweet._api
    if hasattr(tweet, '_json'):
        del tweet._json
    # twitter id 太大了 "id": 618948810941673472 导致 psycopg2.DataError: integer out of range
    if hasattr(tweet, 'id'):
        del tweet.id

    if hasattr(tweet, 'entities'):
        tweet.entities = json.dumps(tweet.entities)
    if hasattr(tweet, 'geo'):
        tweet.geo = json.dumps(tweet.geo)
    if hasattr(tweet, 'coordinates'):
        tweet.coordinates = json.dumps(tweet.coordinates)
    if hasattr(tweet, 'extended_entities'):
        tweet.extended_entities = json.dumps(tweet.extended_entities)
    if hasattr(tweet, 'scopes'):
        tweet.scopes = json.dumps(tweet.scopes)

    if hasattr(tweet, 'retweeted_status'):
        saveTwitter(tweet.retweeted_status)
        tweet.retweeted_status = tweet.retweeted_status.id_str
    if hasattr(tweet, 'quoted_status'):
        # print tweet.quoted_status
        del tweet.quoted_status
    #    saveTwitter(tweet.quoted_status)
    #    tweet.quoted_status = tweet.quoted_status.id_str

    # place  是一个对象(我不知道如何处理): Place(_api=<tweepy.api.API object at 0x1808050>
    if hasattr(tweet, 'place'):
        del tweet.place

    # for k, v in vars(tweet).items():
    #    print '%s=%s' % (k, v)

    #return pg.insertIfNotExist(pg, 'twitter_message', vars(tweet), "id_str='%s'" % tweet.id_str)

    m = public_bz.storage()
    m.id_str = tweet.id_str
    m.m_type = 'twitter'
    m.m_user_id = tweet.t_user_id
    m.created_at = tweet.created_at
    if hasattr(tweet, 'extended_entities'):
        m.extended_entities = tweet.extended_entities
    m.content = None
    m.text = tweet.text
    m.href = 'https://twitter.com/' + screen_name + '/status/' + tweet.id_str

    return pg.insertIfNotExist(
        pg, 'm', m, "id_str='%s' and m_type='twitter'" % tweet.id_str)