Пример #1
0
def updateFeed(request):
    current_user = request.user
    pattern = re.compile( r'\'(http://[^\'\"]+\.jpe?g)\'' )

    subscriptions = UsersSubscriptions.objects.filter( User=current_user )

    for _subscription in subscriptions:
        feed_items_Urls = []
        sub_id = _subscription.Subscription_id
        items = SubscriptionItem.objects.filter(Subscription_id=sub_id  )
        sub = Subscription.objects.get( id=sub_id )
        for item in items:
            feed_items_Urls.append( item.ImageUrl )

        feed = feedparser.parse( sub.Address )
        for rss_item in feed.entries:
            try:
                published_date = parser.parse( rss_item.published )
                # Text rss_item representation for regexp search
                text = str( rss_item )
                img_url = pattern.search( str( text ) ).group( 1 )
                # Image is very small
                if int( get_image_size( img_url ) ) < 100000:
                    continue
                # Image older, than subscription.
                if _subscription.AddedDate.date() > published_date.date():
                    continue
                # Image already exists
                if any( x == img_url for x in feed_items_Urls ):
                    continue
                else:
                    feedItem = SubscriptionItem( )
                    feedItem.Subscription = sub
                    feedItem.AddedDate = published_date
                    feedItem.ImageUrl = img_url
                    feedItem.Url = rss_item.link
                    feedItem.Description = rss_item['description']
                    feedItem.save( )
                    feed_items_Urls.append( feedItem.ImageUrl )
            except AttributeError:
                continue
            except Exception as e:
                logger.exception( e.__context__ )
                continue
        sub.LastUpdateDate = timezone.now()
        sub.save( )
    return HttpResponse(status=200)
Пример #2
0
def run_task():
    feed_items_list = []
    pattern = re.compile( r'\'(http://[^\'\"]+\.jpe?g)\'' )

    subscriptions = UsersSubscriptions.objects.all()

    for _subscription in subscriptions:
        added_date = _subscription.AddedDate
        sub_id = _subscription.Subscription_id
        result_query = (Q( Subscription_id=sub_id ) & Q( AddedDate__gte=added_date ))
        items = SubscriptionItem.objects.filter( result_query )
        sub = Subscription.objects.get( id=sub_id )
        for item in items:
            feed_items_list.append( item )

        feed = feedparser.parse( sub.Address )
        for rss_item in feed.entries:
            try:
                # Text rss_item representation for regexp search
                text = str( rss_item )
                img_url = pattern.search( str( text ) ).group( 1 )
                # Image is very small
                r = requests.head( img_url )
                if int(r.headers['content-length']) < 100000:
                    continue
                # Image older, than subscription.
                # if _subscription.AddedDate > datetime.datetime.strptime(rss_item.published, "%d %m %Y"):
                #    continue
                # Image already exists
                if any( x.ImageUrl == img_url for x in feed_items_list ):
                    continue
                else:
                    feedItem = SubscriptionItem( )
                    feedItem.Subscription = sub
                    feedItem.AddedDate = parser.parse( rss_item.published )
                    feedItem.ImageUrl = img_url
                    feedItem.Url = rss_item.link
                    feedItem.Description = rss_item['description']
                    feedItem.save( )
                    feed_items_list.append( feedItem )
            except AttributeError:
                continue
            except Exception as e:
                logger.exception( e.__context__ )
                continue
        sub.LastUpdateDate = datetime.datetime.now( )
        sub.save( )