def search_npr(url_num): url = 'http://api.npr.org/query?apiKey=' key = 'MDEyNTYyODg1MDEzODM5ODAxNTIzYjc2ZQ001' url = url + key url += '&numResults=1&format=json&id=' url += str(url_num) #open our url, load the JSON response = urlopen(url) json_obj = load(response) items = [] textLength=0. #parse our story item = Item() for story in json_obj['list']['story']: title = story['title']['$text'] link = story['link'][0]['$text'] item.name = title item.url = link print link item.add_tag("world") for paragraph in story['textWithHtml']['paragraph']: text = paragraph['$text'] textLength +=len(text) time = durationfunctionstupid(requests.get(url).text) item.duration = time items.append(item) return items
def search_youtube(query): search_url = "https://gdata.youtube.com/feeds/api/videos?alt=json" #id_start = len('http://gdata.youtube.com/feeds/api/videos/') response = requests.get(search_url, params={"q":query}) #print response.url #print str(response.text) response = response.json() videos = response["feed"]["entry"] items = [] for video in videos: # Lets get the title title = video["title"]["$t"] # Now lets get tags tag = video["category"][1]["term"] # We need to contact another api to get the length vid_id_long = video["id"]["$t"] key = # Fill me in id_start = len('http://gdata.youtube.com/feeds/api/videos/') api_url_base = "https://www.googleapis.com/youtube/v3/videos?part=contentDetails%2Cstatistics&key=" api_url_base += key vid_id = vid_id_long[id_start:] response = requests.get(api_url_base, params={"id": vid_id}) response = response.json() duration = response["items"][0]["contentDetails"]["duration"] # Warning google is returning duration in this bullshit-ass form: PT5M29S duration = fmt_num(get_number(duration))#parse_google_time(duration) url = "https://www.youtube.com/watch?"+vid_id item = Item() item.duration = duration item.name = title item.add_tag(tag) item.url = url #if duration: items.append(item) return items
def nytimes_urls(category): url_list = [] for item in response["results"]: if item["section"] == category: article = Item() article.name = item["title"].encode('utf-8') html = requests.get(item["url"].encode('utf-8')).text article.url = item["url"].encode('utf-8') article.duration = durationfunctionstupid(html) article.add_tag(category) url_list.append(article) return url_list
def search_youtube(query): search_url = "https://gdata.youtube.com/feeds/api/videos?alt=json" #id_start = len('http://gdata.youtube.com/feeds/api/videos/') response = requests.get(search_url, params={"q": query}) #print response.url #print str(response.text) response = response.json() videos = response["feed"]["entry"] """vid_ids_long = [video["id"]["$t"] for video in videos] vid_ids = [vid_id[id_start:] for vid_id in vid_ids_long] api_url_base = "https://www.googleapis.com/youtube/v3/videos?id=" api_urls = [api_url_base + vid_id + "&key=AIzaSyCQ5Zw_FlMQgu_tGN559nqpEZ4Dx0PPPXQ&fields=items(id,snippet(channelId,title,categoryId),statistics)&part=snippet,statistics" for vid_id in vid_ids] l_items = len(api_urls) for i in xrange(0,l_api_urls): # build my f*****g items api_url = vid_urls[i] item = Item() item.tags = [] response = requests.get(api_url).json() items.add_tag(videos[i]["category"][1]["term"]) item.name = response["items"]["title"] item.duration = response["items"]["contentDetails"]["duration"] # Warning google is returning duration in this bullshit-ass form: PT29S item. """ items = [] for video in videos: # Lets get the title title = video["title"]["$t"] # Now lets get tags tag = video["category"][1]["term"] # We need to contact another api to get the length vid_id_long = video["id"]["$t"] id_start = len('http://gdata.youtube.com/feeds/api/videos/') api_url_base = "https://www.googleapis.com/youtube/v3/videos?part=contentDetails%2Cstatistics&key=AIzaSyCQ5Zw_FlMQgu_tGN559nqpEZ4Dx0PPPXQ" vid_id = vid_id_long[id_start:] #api_url = api_url_base + vid_id + "&key=AIzaSyCQ5Zw_FlMQgu_tGN559nqpEZ4Dx0PPPXQ&fields=items(id,snippet(channelId,title,categoryId),statistics)&part=snippet,statistics" response = requests.get(api_url_base, params={"id": vid_id}) response = response.json() duration = response["items"][0]["contentDetails"][ "duration"] # Warning google is returning duration in this bullshit-ass form: PT5M29S url = "https://www.youtube.com/watch?" + vid_id item = Item() item.duration = duration item.name = title item.add_tag(tag) item.url = url items.append(item) return items
def parse_times_and_urls(url_times): regex = re.compile("\">(?P<time>\d\d\:\d\d)") times = [regex.findall(i[1])[0] for i in url_times] times = [time[0] for time in times] urls = [i[0] for i in url_times] for a in urls: item = Item() item.duration = times[a] item.url = urls[a] soup = BeautifulSoup(html) name = soup.find_all(re.compile("^h2")) item.name = name item.add_tag("game") items.append(item) return items
def search_youtube(query): search_url = "https://gdata.youtube.com/feeds/api/videos?alt=json" #id_start = len('http://gdata.youtube.com/feeds/api/videos/') response = requests.get(search_url, params={"q":query}) #print response.url #print str(response.text) response = response.json() videos = response["feed"]["entry"] """vid_ids_long = [video["id"]["$t"] for video in videos] vid_ids = [vid_id[id_start:] for vid_id in vid_ids_long] api_url_base = "https://www.googleapis.com/youtube/v3/videos?id=" api_urls = [api_url_base + vid_id + "&key=AIzaSyCQ5Zw_FlMQgu_tGN559nqpEZ4Dx0PPPXQ&fields=items(id,snippet(channelId,title,categoryId),statistics)&part=snippet,statistics" for vid_id in vid_ids] l_items = len(api_urls) for i in xrange(0,l_api_urls): # build my f*****g items api_url = vid_urls[i] item = Item() item.tags = [] response = requests.get(api_url).json() items.add_tag(videos[i]["category"][1]["term"]) item.name = response["items"]["title"] item.duration = response["items"]["contentDetails"]["duration"] # Warning google is returning duration in this bullshit-ass form: PT29S item. """ items = [] for video in videos: # Lets get the title title = video["title"]["$t"] # Now lets get tags tag = video["category"][1]["term"] # We need to contact another api to get the length vid_id_long = video["id"]["$t"] id_start = len('http://gdata.youtube.com/feeds/api/videos/') api_url_base = "https://www.googleapis.com/youtube/v3/videos?part=contentDetails%2Cstatistics&key=AIzaSyCQ5Zw_FlMQgu_tGN559nqpEZ4Dx0PPPXQ" vid_id = vid_id_long[id_start:] #api_url = api_url_base + vid_id + "&key=AIzaSyCQ5Zw_FlMQgu_tGN559nqpEZ4Dx0PPPXQ&fields=items(id,snippet(channelId,title,categoryId),statistics)&part=snippet,statistics" response = requests.get(api_url_base, params={"id": vid_id}) response = response.json() duration = response["items"][0]["contentDetails"]["duration"] # Warning google is returning duration in this bullshit-ass form: PT5M29S url = "https://www.youtube.com/watch?"+vid_id item = Item() item.duration = duration item.name = title item.add_tag(tag) item.url = url items.append(item) return items
def get_reddit_top(keyword, number): number = n_sanity_check(number) payload = {'limit': number + 1} response = reddit_request('http://www.reddit.com/r/' + keyword + '.json', payload) for i in xrange(0, number): #fp.write(data["data"]["children"][i]["data"]["url"]) #fp.write('\n') item_response = response["data"]["children"][i]["data"] if item_response["over_18"]: # Get that shit outta here break url = item_response["url"] name = item_response["title"] duration = "" tag = item_response["subreddit"] item = Item() item.duration = duration item.name = name item.add_tag(tag) item.url = url items.append(item) return items
def get_reddit_top(keyword, number): number = n_sanity_check(number) payload = {'limit': number+1} response = reddit_request('http://www.reddit.com/r/'+keyword+'.json', payload) for i in xrange(0,number): #fp.write(data["data"]["children"][i]["data"]["url"]) #fp.write('\n') item_response = response["data"]["children"][i]["data"] if item_response["over_18"]: # Get that shit outta here break url = item_response["url"] name = item_response["title"] duration = "" tag = item_response["subreddit"] item = Item() item.duration = duration item.name = name item.add_tag(tag) item.url = url items.append(item) return items