def updateFeed(request): current_user = request.user pattern = re.compile( r'\'(http://[^\'\"]+\.jpe?g)\'' ) subscriptions = UsersSubscriptions.objects.filter( User=current_user ) for _subscription in subscriptions: feed_items_Urls = [] sub_id = _subscription.Subscription_id items = SubscriptionItem.objects.filter(Subscription_id=sub_id ) sub = Subscription.objects.get( id=sub_id ) for item in items: feed_items_Urls.append( item.ImageUrl ) feed = feedparser.parse( sub.Address ) for rss_item in feed.entries: try: published_date = parser.parse( rss_item.published ) # Text rss_item representation for regexp search text = str( rss_item ) img_url = pattern.search( str( text ) ).group( 1 ) # Image is very small if int( get_image_size( img_url ) ) < 100000: continue # Image older, than subscription. if _subscription.AddedDate.date() > published_date.date(): continue # Image already exists if any( x == img_url for x in feed_items_Urls ): continue else: feedItem = SubscriptionItem( ) feedItem.Subscription = sub feedItem.AddedDate = published_date feedItem.ImageUrl = img_url feedItem.Url = rss_item.link feedItem.Description = rss_item['description'] feedItem.save( ) feed_items_Urls.append( feedItem.ImageUrl ) except AttributeError: continue except Exception as e: logger.exception( e.__context__ ) continue sub.LastUpdateDate = timezone.now() sub.save( ) return HttpResponse(status=200)
def run_task(): feed_items_list = [] pattern = re.compile( r'\'(http://[^\'\"]+\.jpe?g)\'' ) subscriptions = UsersSubscriptions.objects.all() for _subscription in subscriptions: added_date = _subscription.AddedDate sub_id = _subscription.Subscription_id result_query = (Q( Subscription_id=sub_id ) & Q( AddedDate__gte=added_date )) items = SubscriptionItem.objects.filter( result_query ) sub = Subscription.objects.get( id=sub_id ) for item in items: feed_items_list.append( item ) feed = feedparser.parse( sub.Address ) for rss_item in feed.entries: try: # Text rss_item representation for regexp search text = str( rss_item ) img_url = pattern.search( str( text ) ).group( 1 ) # Image is very small r = requests.head( img_url ) if int(r.headers['content-length']) < 100000: continue # Image older, than subscription. # if _subscription.AddedDate > datetime.datetime.strptime(rss_item.published, "%d %m %Y"): # continue # Image already exists if any( x.ImageUrl == img_url for x in feed_items_list ): continue else: feedItem = SubscriptionItem( ) feedItem.Subscription = sub feedItem.AddedDate = parser.parse( rss_item.published ) feedItem.ImageUrl = img_url feedItem.Url = rss_item.link feedItem.Description = rss_item['description'] feedItem.save( ) feed_items_list.append( feedItem ) except AttributeError: continue except Exception as e: logger.exception( e.__context__ ) continue sub.LastUpdateDate = datetime.datetime.now( ) sub.save( )