def fetch_itunes_lookup(podcast_id): podcast = Podcast.objects.get(id=podcast_id) print("Fetching itunes lookup: {!r}".format(podcast.name)) results = itunes_search(podcast.name) if not results: print("Nothing returned on itunes lookup: {!r}".format(podcast.name)) return if results["resultCount"] == 1: lookup = results["results"][0] podcast.itunes_lookup = lookup podcast.save() elif results["resultCount"] > 1: # Pick the first one if it's a slam dunk lookup = results["results"][0] if podcast.name.lower() == lookup["collectionName"].lower(): podcast.itunes_lookup = lookup podcast.save() elif podcast.url in [x.get("feedUrl") for x in results["results"]]: lookup = [ x for x in results["results"] if x.get("feedUrl") == podcast.url ][0] podcast.itunes_lookup = lookup podcast.save() else: print("Too ambiguous ({!r} != {!r}, {!r} != {!r})".format( podcast.name, lookup["collectionName"], podcast.url, lookup.get("feedUrl"), )) else: print("Found no results")
def fetch_itunes_lookup(podcast_id): podcast = Podcast.objects.get(id=podcast_id) print("Fetching itunes lookup: {!r}".format(podcast.name)) results = itunes_search(podcast.name) if not results: print("Nothing returned on itunes lookup: {!r}".format(podcast.name)) return if results["resultCount"] == 1: lookup = results["results"][0] podcast.itunes_lookup = lookup podcast.save() elif results["resultCount"] > 1: # Pick the first one if it's a slam dunk lookup = results["results"][0] if podcast.name.lower() == lookup["collectionName"].lower(): podcast.itunes_lookup = lookup podcast.save() elif podcast.url in [x.get("feedUrl") for x in results["results"]]: lookup = [x for x in results["results"] if x.get("feedUrl") == podcast.url][ 0 ] podcast.itunes_lookup = lookup podcast.save() else: print( "Too ambiguous ({!r} != {!r}, {!r} != {!r})".format( podcast.name, lookup["collectionName"], podcast.url, lookup.get("feedUrl"), ) ) else: print("Found no results")
def search_by_itunes(q): try: print("ITUNES SEARCHING {!r}".format(q)) results = itunes_search(q, attribute="titleTerm", timeout=6)["results"] except (ReadTimeout, ConnectTimeout): results = [] print("FOUND {}".format(len(results))) count_new = 0 for result in results[:10]: if not result.get("feedUrl"): continue try: podcast = Podcast.objects.get( url=result["feedUrl"], name=result["collectionName"] ) except Podcast.DoesNotExist: assert result["collectionName"], result podcast = Podcast.objects.create( name=result["collectionName"], url=result["feedUrl"], itunes_lookup=result, image_url=result["artworkUrl600"], ) try: podcast.download_image(timeout=3) except (ReadTimeout, ConnectTimeout): redownload_podcast_image(podcast.id) download_episodes_task(podcast.id) count_new += 1 print("Found {} new podcasts by iTunes search".format(count_new))
def search_by_itunes(q): try: print("ITUNES SEARCHING {!r}".format(q)) results = itunes_search(q, attribute="titleTerm", timeout=6)["results"] except (ReadTimeout, ConnectTimeout): results = [] print("FOUND {}".format(len(results))) count_new = 0 for result in results[:10]: if not result.get("feedUrl"): continue try: podcast = Podcast.objects.get(url=result["feedUrl"], name=result["collectionName"]) except Podcast.DoesNotExist: assert result["collectionName"], result podcast = Podcast.objects.create( name=result["collectionName"], url=result["feedUrl"], itunes_lookup=result, image_url=result["artworkUrl600"], ) try: podcast.download_image(timeout=3) except (ReadTimeout, ConnectTimeout): redownload_podcast_image(podcast.id) download_episodes_task(podcast.id) count_new += 1 print("Found {} new podcasts by iTunes search".format(count_new))
def fetch_itunes_lookup(podcast_id): podcast = Podcast.objects.get(id=podcast_id) print "Fetching itunes lookup", repr(podcast.name) results = itunes_search(podcast.name) if results['resultCount'] == 1: lookup = results['results'][0] podcast.itunes_lookup = lookup podcast.save() else: print "Found", results['resultCount'], 'results' print results['resultCount']
def add(request): context = {} context['page_title'] = 'Add Podcast' id = request.GET.get('id', '').strip() if id: podcast = get_object_or_404(Podcast, id=id) if not podcast.image and podcast.image_url: podcast.download_image() if not Episode.objects.filter(podcast=podcast).exists(): download_episodes(podcast) url = reverse('podcasttime:index') + '#ids={}'.format(podcast.id) return redirect(url) search = request.GET.get('search', '').strip() context['search'] = search if search: podcasts = [] matches = itunes_search(search, attribute='titleTerm') for result in matches['results']: pod = { 'image_url': result['artworkUrl600'], 'itunes_url': result['collectionViewUrl'], 'artist_name': result['artistName'], 'tags': result['genres'], 'name': result['collectionName'], # 'feed_url': result['feedUrl'], } try: podcast = Podcast.objects.get( url=result['feedUrl'], name=result['collectionName'] ) except Podcast.DoesNotExist: podcast = Podcast.objects.create( name=result['collectionName'], url=result['feedUrl'], itunes_lookup=result, image_url=result['artworkUrl600'], ) # episodes will be created and downloaded by the cron job redownload_podcast_image.delay(podcast.id) pod['id'] = podcast.id pod['url'] = reverse( 'podcasttime:podcast_slug', args=(podcast.id, podcast.get_or_create_slug()) ) podcasts.append(pod) context['found'] = matches['resultCount'] context['podcasts'] = podcasts return render(request, 'podcasttime/add.html', context)
def search_by_itunes(q): try: print("ITUNES SEARCHING {!r}".format(q)) results = itunes_search(q, attribute="titleTerm", timeout=6)["results"] except (ReadTimeout, ConnectTimeout): results = [] print("FOUND {}".format(len(results))) count_new = 0 for result in results[:10]: if not result.get("feedUrl"): continue if (result["collectionName"] == "dj-andy-bee Deep n Soulful House # Urban Soul Podcast" or result["feedUrl"] == "http://dj-andy-b.podOmatic.com/rss2.xml"): # https://www.lumendatabase.org/notices/18580243# print("REFUSE COLLETION") continue try: podcast = Podcast.objects.get(url=result["feedUrl"], name=result["collectionName"]) except Podcast.DoesNotExist: assert result["collectionName"], result podcast = Podcast.objects.create( name=result["collectionName"], url=result["feedUrl"], itunes_lookup=result, image_url=result["artworkUrl600"], ) try: podcast.download_image(timeout=3) except (ReadTimeout, ConnectTimeout): redownload_podcast_image(podcast.id) download_episodes_task(podcast.id) count_new += 1 print("Found {} new podcasts by iTunes search".format(count_new))
def find(request): if not (request.GET.get("ids") or request.GET.get("q")): return http.HttpResponseBadRequest("no ids or q") found = [] max_ = 5 q = None total = None cutoff = timezone.now() - datetime.timedelta( days=settings.LATEST_PODCAST_CUTOFF_DAYS ) def package_podcast(podcast): if type(podcast) is Podcast: return podcast.to_search_doc() else: # remove summary fields since it's rather large podcast.pop("summary", None) if "episodes_count" not in podcast: # better than undefined podcast["episodes_count"] = None podcast["total_hours"] = None if podcast.get("episodes_seconds"): podcast["total_hours"] = podcast.pop("episodes_seconds") / 3600 try: if podcast["latest_episode"] < cutoff: podcast["_outdated"] = True else: podcast["_outdated"] = False except KeyError: podcast["_outdated"] = True return podcast if request.GET.get("ids"): search = PodcastDoc.search() ids = [int(x) for x in request.GET["ids"].split(",")] search = search.filter("terms", id=ids) response = search.execute() if not response.hits.total: podcasts_orm = Podcast.objects.filter(id__in=ids) for podcast in podcasts_orm.filter(error__isnull=True): if not podcast.total_seconds or not podcast.last_fetch: cache_key = "resubmit:{}".format(podcast.id) if not cache.get(cache_key): print( "Forcing {!r} (id={}) to download episodes".format( podcast.name, podcast.id ) ) download_episodes_task(podcast.id) cache.set(cache_key, True, 60) else: podcast.save() for hit in response.hits: podcast = package_podcast(hit.to_dict()) if ( podcast["episodes_count"] is None or not podcast.get("last_fetch") or podcast["last_fetch"] < (timezone.now() - datetime.timedelta(days=7)) ): cache_key = "resubmit:{}".format(podcast["id"]) if not cache.get(cache_key): print( "Forcing {!r} (id={}) to download episodes".format( podcast["name"], podcast["id"] ) ) download_episodes_task(podcast["id"]) cache.set(cache_key, True, 60) podcast["_updating"] = True found.append(podcast) # rearrange them in the order they were found = sorted(found, key=lambda x: ids.index(x["id"])) elif request.GET.get("submitted"): q = request.GET["q"] try: results = itunes_search(q, attribute="titleTerm", timeout=6)["results"] except (ReadTimeout, ConnectTimeout): results = [] for result in results[:max_]: if not result.get("feedUrl"): print("Weird result", result) continue try: podcast = Podcast.objects.get( url=result["feedUrl"], name=result["collectionName"] ) except Podcast.DoesNotExist: assert result["collectionName"], result podcast = Podcast.objects.create( name=result["collectionName"], url=result["feedUrl"], itunes_lookup=result, image_url=result["artworkUrl600"], ) try: podcast.download_image(timeout=3) except (ReadTimeout, ConnectTimeout): redownload_podcast_image(podcast.id) download_episodes_task(podcast.id) # Reload since the task functions operate on a new instance # podcast = Podcast.objects.get(id=podcast.id) found.append(package_podcast(podcast)) else: q = request.GET["q"] search = PodcastDoc.search() search = search.query("match_phrase", name=q) search = search[:max_] response = search.execute() total = response.hits.total for hit in response.hits: podcast = package_podcast(hit.to_dict()) if podcast["episodes_count"] is None: cache_key = "resubmit:{}".format(podcast["id"]) if not cache.get(cache_key): print( "Forcing {!r} (id={}) to download episodes".format( podcast["name"], podcast["id"] ) ) download_episodes_task(podcast["id"]) cache.set(cache_key, True, 60) # this will force the client-side to re-query for this podcast["last_fetch"] = None found.append(podcast) if total < 5: print("NOTHING FOUND!", q, "SENDING IT TO iTUNES") search_by_itunes(q) if total is None: total = len(found) return http.JsonResponse({"items": found, "total": total, "q": q})
def find(request): if not (request.GET.get("ids") or request.GET.get("q")): return http.HttpResponseBadRequest("no ids or q") found = [] max_ = 5 q = None total = None cutoff = timezone.now() - datetime.timedelta( days=settings.LATEST_PODCAST_CUTOFF_DAYS ) def package_podcast(podcast): if type(podcast) is Podcast: return podcast.to_search_doc() else: # remove summary fields since it's rather large podcast.pop("summary", None) if "episodes_count" not in podcast: # better than undefined podcast["episodes_count"] = None podcast["total_hours"] = None if podcast.get("episodes_seconds"): podcast["total_hours"] = podcast.pop("episodes_seconds") / 3600 try: if podcast["latest_episode"] < cutoff: podcast["_outdated"] = True else: podcast["_outdated"] = False except KeyError: podcast["_outdated"] = True return podcast if request.GET.get("ids"): search = PodcastDoc.search() ids = [int(x) for x in request.GET["ids"].split(",")] search = search.filter("terms", id=ids) response = search.execute() if not response.hits.total: podcasts_orm = Podcast.objects.filter(id__in=ids) for podcast in podcasts_orm.filter(error__isnull=True): if not podcast.total_seconds or not podcast.last_fetch: cache_key = "resubmit:{}".format(podcast.id) if not cache.get(cache_key): print( "Forcing {!r} (id={}) to download episodes".format( podcast.name, podcast.id ) ) download_episodes_task(podcast.id) cache.set(cache_key, True, 60) else: podcast.save() for hit in response.hits: podcast = package_podcast(hit.to_dict()) if ( podcast["episodes_count"] is None or not podcast.get("last_fetch") or podcast["last_fetch"] < (timezone.now() - datetime.timedelta(days=7)) ): cache_key = "resubmit:{}".format(podcast["id"]) if not cache.get(cache_key): print( "Forcing {!r} (id={}) to download episodes".format( podcast["name"], podcast["id"] ) ) download_episodes_task(podcast["id"]) cache.set(cache_key, True, 60) podcast["_updating"] = True found.append(podcast) # rearrange them in the order they were found = sorted(found, key=lambda x: ids.index(x["id"])) elif request.GET.get("submitted"): q = request.GET["q"] try: results = itunes_search(q, attribute="titleTerm", timeout=6)["results"] except (ReadTimeout, ConnectTimeout): results = [] for result in results[:max_]: if not result.get("feedUrl"): print("Weird result", result) continue if ( result["collectionName"] == "dj-andy-bee Deep n Soulful House # Urban Soul Podcast" or result["feedUrl"] == "http://dj-andy-b.podOmatic.com/rss2.xml" ): # https://www.lumendatabase.org/notices/18580243# print("REFUSE COLLETION") continue try: podcast = Podcast.objects.get( url=result["feedUrl"], name=result["collectionName"] ) except Podcast.DoesNotExist: assert result["collectionName"], result podcast = Podcast.objects.create( name=result["collectionName"], url=result["feedUrl"], itunes_lookup=result, image_url=result["artworkUrl600"], ) try: podcast.download_image(timeout=3) except (ReadTimeout, ConnectTimeout): redownload_podcast_image(podcast.id) download_episodes_task(podcast.id) # Reload since the task functions operate on a new instance # podcast = Podcast.objects.get(id=podcast.id) found.append(package_podcast(podcast)) else: q = request.GET["q"] search = PodcastDoc.search() search = search.query("match_phrase", name=q) search = search[:max_] response = search.execute() total = response.hits.total for hit in response.hits: podcast = package_podcast(hit.to_dict()) if podcast["episodes_count"] is None: cache_key = "resubmit:{}".format(podcast["id"]) if not cache.get(cache_key): print( "Forcing {!r} (id={}) to download episodes".format( podcast["name"], podcast["id"] ) ) download_episodes_task(podcast["id"]) cache.set(cache_key, True, 60) # this will force the client-side to re-query for this podcast["last_fetch"] = None found.append(podcast) if total < 5: print("NOTHING FOUND!", q, "SENDING IT TO iTUNES") search_by_itunes(q) if total is None: total = len(found) return http.JsonResponse({"items": found, "total": total, "q": q})
def find(request): if not (request.GET.get('ids') or request.GET.get('q')): return http.HttpResponseBadRequest('no ids or q') found = [] max_ = 5 q = None if request.GET.get('ids'): ids = [int(x) for x in request.GET['ids'].split(',')] found = Podcast.objects.filter(id__in=ids) # rearrange them in the order they were found = sorted(found, key=lambda x: ids.index(x.id)) # for podcast in found: # if not podcast.last_fetch: # download_episodes_task.delay(podcast.id) elif request.GET.get('itunes'): q = request.GET['q'] try: results = itunes_search( q, attribute='titleTerm', timeout=6, )['results'] except (ReadTimeout, ConnectTimeout): results = [] for result in results: # pod = { # 'image_url': result['artworkUrl600'], # 'itunes_url': result['collectionViewUrl'], # 'artist_name': result['artistName'], # 'tags': result['genres'], # 'name': result['collectionName'], # # 'feed_url': result['feedUrl'], # } try: podcast = Podcast.objects.get( url=result['feedUrl'], name=result['collectionName'] ) except Podcast.DoesNotExist: podcast = Podcast.objects.create( name=result['collectionName'], url=result['feedUrl'], itunes_lookup=result, image_url=result['artworkUrl600'], ) try: podcast.download_image(timeout=3) except (ReadTimeout, ConnectTimeout): redownload_podcast_image(podcast.id) download_episodes_task.delay(podcast.id) # Reload since the task functions operate on a new instance # podcast = Podcast.objects.get(id=podcast.id) found.append(podcast) else: q = request.GET['q'] items = [] # import time # time.sleep(random.randint(1,4)) base_qs = Podcast.objects.filter(error__isnull=True) podcasts = base_qs.filter(name__istartswith=q) for podcast in podcasts[:max_]: found.append(podcast) if len(q) > 2: sql = ( "to_tsvector('english', name) @@ " "plainto_tsquery('english', %s)" ) podcasts = base_qs.exclude( id__in=[x.id for x in found] ).extra( where=[sql], params=[q] )[:max_] for podcast in podcasts[:max_]: if len(found) >= max_: break found.append(podcast) if len(q) > 1: podcasts = base_qs.filter(name__icontains=q).exclude( id__in=[x.id for x in found] ) for podcast in podcasts[:max_]: if len(found) >= max_: break found.append(podcast) def episodes_meta(podcast): episodes_cache_key = 'episodes-meta%s' % podcast.id meta = cache.get(episodes_cache_key) if meta is None: episodes = Episode.objects.filter(podcast=podcast) episodes_count = episodes.count() total_hours = None if episodes_count: total_seconds = episodes.aggregate( Sum('duration') )['duration__sum'] if total_seconds: total_hours = total_seconds / 3600.0 else: download_episodes_task.delay(podcast.id) meta = { 'count': episodes_count, 'total_hours': total_hours, } if episodes_count: cache.set(episodes_cache_key, meta, 60 * 60 * 24) return meta items = [] for podcast in found: if podcast.image and is_html_document(podcast.image.path): print "Found a podcast.image that wasn't an image" podcast.image = None podcast.save() if podcast.image: if podcast.image.size < 1000: print "IMAGE LOOKS SUSPICIOUS" print podcast.image_url print repr(podcast), podcast.id print podcast.url print repr(podcast.image.read()) podcast.download_image() thumb_url = None if podcast.image: try: thumb_url = thumbnail( podcast.image, '100x100', quality=81, upscale=False ).url thumb_url = make_absolute_url(thumb_url, request) except IOError: import sys print "BAD IMAGE!" print sys.exc_info() print repr(podcast.image) print repr(podcast), podcast.url print podcast.image = None podcast.save() redownload_podcast_image.delay(podcast.id) else: redownload_podcast_image.delay(podcast.id) # Temporarily put here if podcast.itunes_lookup is None: fetch_itunes_lookup.delay(podcast.id) meta = episodes_meta(podcast) episodes_count = meta['count'] total_hours = meta['total_hours'] items.append({ 'id': podcast.id, 'name': podcast.name, 'image_url': thumb_url, 'episodes': episodes_count, 'hours': total_hours, 'last_fetch': podcast.last_fetch, 'slug': podcast.get_or_create_slug(), 'url': reverse( 'podcasttime:podcast_slug', args=(podcast.id, podcast.get_or_create_slug()) ), }) return http.JsonResponse({ 'items': items, 'q': q, })