def pull_emails_from_url(url=TEST_URL): """ """ logging.info(" --- pull_emails_from_url") r = requests.get(url) for email in EMAILS.findall(r.content): try: e,_ =Email.objects.get_or_create(email=email,reference_url=url) if _: print "NEW: {}".format(e) else: print "EXISTING: {}".format(e) except IntegrityError: pass
def run(self, **kwargs): logging.info(" --- pull_emails") print(" --- pull_emails") t = twython.Twython(**params) search = t.search(q='dumpmon', count=1000) tweets = search['statuses'] for tweet in tweets: urls = URLS.findall(tweet['text']) if len(urls) > 0: valid_urls_for_parsing = [u for u in urls[0] \ if TWITTER_SHORTENED_URL in u] for u in valid_urls_for_parsing: pull_emails_from_url(u)