class DiffBotTest(unittest.TestCase): def setUp(self): self.diffbot = DiffBot() # dev_key has to come from the environment (DIFFBOT_KEY) self.test_url = 'http://nomulous.com/' self.test_url_id = self.diffbot.follow_add(self.test_url)['id'] def test_http_handler_instance(self): self.assertIsInstance(self.diffbot.http_handler(), HttpHandler) def test_cache_handler_instance(self): self.assertIsInstance(self.diffbot.http_handler().cache_handler(), CacheHandler) def test_article_API(self): article_info = self.diffbot.article(self.test_url) self.assertIsInstance(article_info, dict) self.assertNotEqual(len(article_info), 0) for key in ['url', 'text', 'xpath', 'tags', 'raw_response', 'title']: self.assertTrue(article_info.has_key(key)) def test_follow_add_API(self): follow_add_info = self.diffbot.follow_add(self.test_url) self.assertIsInstance(follow_add_info, dict) self.assertNotEqual(len(follow_add_info), 0) for key in ['id', 'pubDate', 'title']: self.assertTrue(follow_add_info.has_key(key)) def test_follow_read_API(self): follow_read_info = self.diffbot.follow_read(self.test_url_id) self.assertIsInstance(follow_read_info, dict) self.assertNotEqual(len(follow_read_info), 0) for key in ['info', 'items']: self.assertTrue(follow_read_info.has_key(key))
#!flask/bin/python from app import app, models, db from diffbot import DiffBot from datetime import datetime, time # Initiate API and get pizzas api = DiffBot(app.config['DIFFBOT_KEY']) cb = api.article('http://cheeseboardcollective.coop/pizza', summary=True) # Split the text field containing the pizzas into list by new line text = cb['text'].split('\n') # Remove extra crap schedule = text[1:11] def replaceTime(t): for idx, val in enumerate(t): # if index is 0 or is even if idx == 0 or idx % 2 == 0: # convert date string to datetime object t[idx] = datetime.strptime(val,("%A %m/%d")).replace(year=2013) #.strftime('%s') return t # converts regular list into ordered pairs def toOrderedPairs(t): t = replaceTime(t) # TOOD: understand this p = zip(t[::2], t[1::2]) return p
email = raw_input("Enter gmail id: ") password = raw_input("Enter Password: "******"c539badebc2f19f621f616c52a0399d0" #From http://yuji.wordpress.com/2011/06/22/python-imaplib-imap-example-with-gmail/ mail = imaplib.IMAP4_SSL('imap.gmail.com') mail.login(email, password) mail.list() # Out: list of "folders" aka labels in gmail. mail.select("inbox") # connect to inbox. result, data = mail.uid('search', None, "ALL") # search and return uids instead latest_email_uid = data[0].split()[-1] result, data = mail.uid('fetch', latest_email_uid, '(RFC822)') raw_email = data[0][1] #print raw_email #TODO: Process email urls_in_email = urlpattern.findall(raw_email) api = DiffBot(diffbot_api_key) extracted_content = [] for urlitem in urls_in_email: content = api.article(urlitem[0], summary=True) extracted_content.append(content) print extracted_content
import lxml from time import sleep from sababa.models import Article #feedzilla categories categories = {'Art': 13, 'Sports': 27, 'Business': 22, 'Technology': 30, 'Politics': 3, 'Health': 11, 'Entertainment': 6, 'Science': 8} api = DiffBot(DIFFBOT_TOKEN) # api.article('http://google.com', summary=True) def open_landing(url, counter=1): try: landing = BeautifulSoup(urllib2.urlopen(url).read(), 'lxml') return landing except: print(counter) if counter == 3: return None counter += 1 open_landing(url, counter) def fetch_news(code): response = requests.get('http://api.feedzilla.com/v1/categories/{}/articles.json?count=100'.format(code))
def setUp(self): self.diffbot = DiffBot() # dev_key has to come from the environment (DIFFBOT_KEY) self.test_url = 'http://nomulous.com/' self.test_url_id = self.diffbot.follow_add(self.test_url)['id']