Exemplo n.º 1
0
def search_npr(url_num):
	url = 'http://api.npr.org/query?apiKey=' 
	key = 'MDEyNTYyODg1MDEzODM5ODAxNTIzYjc2ZQ001'
	url = url + key
	url += '&numResults=1&format=json&id='
	url += str(url_num)

	#open our url, load the JSON
	response = urlopen(url)
	json_obj = load(response)

	items = []
	textLength=0.
	#parse our story

	item = Item()

	for story in json_obj['list']['story']:
		title = story['title']['$text']
		link = story['link'][0]['$text']
		
        item.name = title
        item.url = link
        print link
        item.add_tag("world")
    
	for paragraph in story['textWithHtml']['paragraph']:
		text = paragraph['$text']
		textLength +=len(text)

	time = durationfunctionstupid(requests.get(url).text)
	item.duration = time
	items.append(item)
	return items
Exemplo n.º 2
0
def search_youtube(query):    
    search_url = "https://gdata.youtube.com/feeds/api/videos?alt=json"
    #id_start = len('http://gdata.youtube.com/feeds/api/videos/')
    response = requests.get(search_url, params={"q":query})
    #print response.url
    #print str(response.text)
    response = response.json()
    videos = response["feed"]["entry"]
    items = []
    for video in videos:
        # Lets get the title
        title = video["title"]["$t"]
        # Now lets get tags
        tag = video["category"][1]["term"]
        # We need to contact another api to get the length
        vid_id_long = video["id"]["$t"]
        key =  # Fill me in
        id_start = len('http://gdata.youtube.com/feeds/api/videos/')
        api_url_base = "https://www.googleapis.com/youtube/v3/videos?part=contentDetails%2Cstatistics&key="
        api_url_base += key
        vid_id = vid_id_long[id_start:]
        response = requests.get(api_url_base, params={"id": vid_id})
        response = response.json()
        duration = response["items"][0]["contentDetails"]["duration"] # Warning google is returning duration in this bullshit-ass form: PT5M29S
        duration = fmt_num(get_number(duration))#parse_google_time(duration)
        url = "https://www.youtube.com/watch?"+vid_id
        item = Item()
        item.duration = duration
        item.name = title
        item.add_tag(tag)
        item.url = url
        #if duration:
        items.append(item)
    return items
Exemplo n.º 3
0
def nytimes_urls(category):
	url_list = []
	for item in response["results"]:
            if item["section"] == category:
                article = Item()
                article.name = item["title"].encode('utf-8')
                html = requests.get(item["url"].encode('utf-8')).text
                article.url = item["url"].encode('utf-8')
                article.duration = durationfunctionstupid(html)
                article.add_tag(category)
                url_list.append(article)
	return url_list
Exemplo n.º 4
0
def search_youtube(query):
    search_url = "https://gdata.youtube.com/feeds/api/videos?alt=json"
    #id_start = len('http://gdata.youtube.com/feeds/api/videos/')
    response = requests.get(search_url, params={"q": query})
    #print response.url
    #print str(response.text)
    response = response.json()
    videos = response["feed"]["entry"]
    """vid_ids_long = [video["id"]["$t"] for video in videos]
    vid_ids = [vid_id[id_start:] for vid_id in vid_ids_long]
    api_url_base = "https://www.googleapis.com/youtube/v3/videos?id="
    api_urls = [api_url_base + vid_id + "&key=AIzaSyCQ5Zw_FlMQgu_tGN559nqpEZ4Dx0PPPXQ&fields=items(id,snippet(channelId,title,categoryId),statistics)&part=snippet,statistics" for vid_id in vid_ids]
    l_items = len(api_urls)
    for i in xrange(0,l_api_urls): # build my f*****g items
        api_url = vid_urls[i]
        item = Item()
        item.tags = []
        response = requests.get(api_url).json()
        items.add_tag(videos[i]["category"][1]["term"])
        item.name = response["items"]["title"]
        item.duration = response["items"]["contentDetails"]["duration"] # Warning google is returning duration in this bullshit-ass form: PT29S
        item.
    """
    items = []
    for video in videos:
        # Lets get the title
        title = video["title"]["$t"]
        # Now lets get tags
        tag = video["category"][1]["term"]
        # We need to contact another api to get the length
        vid_id_long = video["id"]["$t"]
        id_start = len('http://gdata.youtube.com/feeds/api/videos/')
        api_url_base = "https://www.googleapis.com/youtube/v3/videos?part=contentDetails%2Cstatistics&key=AIzaSyCQ5Zw_FlMQgu_tGN559nqpEZ4Dx0PPPXQ"
        vid_id = vid_id_long[id_start:]
        #api_url = api_url_base + vid_id + "&key=AIzaSyCQ5Zw_FlMQgu_tGN559nqpEZ4Dx0PPPXQ&fields=items(id,snippet(channelId,title,categoryId),statistics)&part=snippet,statistics"
        response = requests.get(api_url_base, params={"id": vid_id})
        response = response.json()
        duration = response["items"][0]["contentDetails"][
            "duration"]  # Warning google is returning duration in this bullshit-ass form: PT5M29S
        url = "https://www.youtube.com/watch?" + vid_id
        item = Item()
        item.duration = duration
        item.name = title
        item.add_tag(tag)
        item.url = url
        items.append(item)
    return items
Exemplo n.º 5
0
def parse_times_and_urls(url_times):
    regex = re.compile("\">(?P<time>\d\d\:\d\d)")
    times = [regex.findall(i[1])[0] for i in url_times]    
    times = [time[0] for time in times]
    urls = [i[0] for i in url_times]
    for a in urls:
        item = Item()
        item.duration = times[a]
        item.url = urls[a]
        soup = BeautifulSoup(html)
        name = soup.find_all(re.compile("^h2"))
        item.name = name
        item.add_tag("game")
        items.append(item)
    return items
Exemplo n.º 6
0
def search_youtube(query):    
    search_url = "https://gdata.youtube.com/feeds/api/videos?alt=json"
    #id_start = len('http://gdata.youtube.com/feeds/api/videos/')
    response = requests.get(search_url, params={"q":query})
    #print response.url
    #print str(response.text)
    response = response.json()
    videos = response["feed"]["entry"]
    """vid_ids_long = [video["id"]["$t"] for video in videos]
    vid_ids = [vid_id[id_start:] for vid_id in vid_ids_long]
    api_url_base = "https://www.googleapis.com/youtube/v3/videos?id="
    api_urls = [api_url_base + vid_id + "&key=AIzaSyCQ5Zw_FlMQgu_tGN559nqpEZ4Dx0PPPXQ&fields=items(id,snippet(channelId,title,categoryId),statistics)&part=snippet,statistics" for vid_id in vid_ids]
    l_items = len(api_urls)
    for i in xrange(0,l_api_urls): # build my f*****g items
        api_url = vid_urls[i]
        item = Item()
        item.tags = []
        response = requests.get(api_url).json()
        items.add_tag(videos[i]["category"][1]["term"])
        item.name = response["items"]["title"]
        item.duration = response["items"]["contentDetails"]["duration"] # Warning google is returning duration in this bullshit-ass form: PT29S
        item.
    """
    items = []
    for video in videos:
        # Lets get the title
        title = video["title"]["$t"]
        # Now lets get tags
        tag = video["category"][1]["term"]
        # We need to contact another api to get the length
        vid_id_long = video["id"]["$t"]
        id_start = len('http://gdata.youtube.com/feeds/api/videos/')
        api_url_base = "https://www.googleapis.com/youtube/v3/videos?part=contentDetails%2Cstatistics&key=AIzaSyCQ5Zw_FlMQgu_tGN559nqpEZ4Dx0PPPXQ"
        vid_id = vid_id_long[id_start:]
        #api_url = api_url_base + vid_id + "&key=AIzaSyCQ5Zw_FlMQgu_tGN559nqpEZ4Dx0PPPXQ&fields=items(id,snippet(channelId,title,categoryId),statistics)&part=snippet,statistics"
        response = requests.get(api_url_base, params={"id": vid_id})
        response = response.json()
        duration = response["items"][0]["contentDetails"]["duration"] # Warning google is returning duration in this bullshit-ass form: PT5M29S
        url = "https://www.youtube.com/watch?"+vid_id
        item = Item()
        item.duration = duration
        item.name = title
        item.add_tag(tag)
        item.url = url
        items.append(item)
    return items
Exemplo n.º 7
0
def get_reddit_top(keyword, number):
    number = n_sanity_check(number)
    payload = {'limit': number + 1}
    response = reddit_request('http://www.reddit.com/r/' + keyword + '.json',
                              payload)
    for i in xrange(0, number):
        #fp.write(data["data"]["children"][i]["data"]["url"])
        #fp.write('\n')
        item_response = response["data"]["children"][i]["data"]
        if item_response["over_18"]:  # Get that shit outta here
            break
        url = item_response["url"]
        name = item_response["title"]
        duration = ""
        tag = item_response["subreddit"]
        item = Item()
        item.duration = duration
        item.name = name
        item.add_tag(tag)
        item.url = url
        items.append(item)
    return items
Exemplo n.º 8
0
def get_reddit_top(keyword, number):
    number = n_sanity_check(number)
    payload = {'limit': number+1}
    response = reddit_request('http://www.reddit.com/r/'+keyword+'.json', payload)
    for i in xrange(0,number):
        #fp.write(data["data"]["children"][i]["data"]["url"])
        #fp.write('\n')
        item_response = response["data"]["children"][i]["data"]
        if item_response["over_18"]: # Get that shit outta here
            break
        url = item_response["url"]
        name = item_response["title"]
        duration = ""
        tag = item_response["subreddit"]
        item = Item()
        item.duration = duration
        item.name = name
        item.add_tag(tag)
        item.url = url
        items.append(item)
    return items