Пример #1
0
def search_npr(url_num):
	url = 'http://api.npr.org/query?apiKey=' 
	key = 'MDEyNTYyODg1MDEzODM5ODAxNTIzYjc2ZQ001'
	url = url + key
	url += '&numResults=1&format=json&id='
	url += str(url_num)

	#open our url, load the JSON
	response = urlopen(url)
	json_obj = load(response)

	items = []
	textLength=0.
	#parse our story

	item = Item()

	for story in json_obj['list']['story']:
		title = story['title']['$text']
		link = story['link'][0]['$text']
		
        item.name = title
        item.url = link
        print link
        item.add_tag("world")
    
	for paragraph in story['textWithHtml']['paragraph']:
		text = paragraph['$text']
		textLength +=len(text)

	time = durationfunctionstupid(requests.get(url).text)
	item.duration = time
	items.append(item)
	return items
Пример #2
0
def nytimes_urls(category):
	url_list = []
	for item in response["results"]:
            if item["section"] == category:
                article = Item()
                article.name = item["title"].encode('utf-8')
                html = requests.get(item["url"].encode('utf-8')).text
                article.url = item["url"].encode('utf-8')
                article.duration = durationfunctionstupid(html)
                article.add_tag(category)
                url_list.append(article)
	return url_list
Пример #3
0
def get_yahoo_top(keyword, number):
    url1="http://query.yahooapis.com/v1/public/yql?q=select%20*%20from%20rss%20where%20url%3D%22http%3A%2F%2Frss.news.yahoo.com%2Frss%2F"
    url2="%22&format=json&callback="
    number = n_sanity_check(number)
    response = requests.get(url1+keyword+url2)
    data = response.json()
    items=[]
    for i in xrange(0,number):
        #print data.keys()
        item_response = data["query"]["results"]["item"][i]
        url = item_response["link"]
        name = item_response["title"]
        duration = durationfunctionstupid(requests.get(url).text)
        tag = keyword
        items.append(create_item(duration,name,tag,url))
    return items