def get_local_tweets(source, limit): try: with open(source.file, "r") as fh: input_lines = fh.readlines() except (FileNotFoundError, PermissionError) as e: logger.debug(e) return [] local_tweets = parse_tweets(input_lines, source) return sorted(local_tweets, reverse=True)[:limit]
def retrieve_file(client, source, limit, cache): is_cached = cache.is_cached(source.url) if cache else None headers = { "If-Modified-Since": cache.last_modified(source.url) } if is_cached else {} try: response = yield from client.get(source.url, headers=headers) content = yield from response.text() except Exception as e: if is_cached: logger.debug("{0}: {1} - using cached content".format( source.url, e)) return cache.get_tweets(source.url, limit) else: logger.debug("{0}: {1}".format(source.url, e)) return [] if response.status == 200: tweets = parse_tweets(content.split("\n"), source) if cache: last_modified_header = response.headers.get("Last-Modified") if last_modified_header: logger.debug( "{0} returned 200 and Last-Modified header - adding content to cache" .format(source.url)) cache.add_tweets(source.url, last_modified_header, tweets) else: logger.debug( "{0} returned 200 but no Last-Modified header - can’t cache content" .format(source.url)) else: logger.debug("{0} returned 200".format(source.url)) return sorted(tweets, reverse=True)[:limit] elif response.status == 410 and is_cached: # 410 Gone: # The resource requested is no longer available, # and will not be available again. logger.debug("{0} returned 410 - deleting cached content".format( source.url)) cache.remove_tweets(source.url) return [] elif is_cached: logger.debug("{0} returned {1} - using cached content".format( source.url, response.status)) return cache.get_tweets(source.url, limit) else: logger.debug("{0} returned {1}".format(source.url, response.status)) return []
def test_parse_tweets(): """Test parsing multiple tweet lines""" source = Source("foo", "bar") raw_tweets = [ "2016-02-08T00:00:00\tHallo", "2016-02-08T00:00:00\tBar\n", "2016-02-08T00:00:00\tFoo\n", "3000-02-08T00:00:00\tHallo\n", ] tweets = parse_tweets(raw_tweets, source) assert len(tweets) == 3
def retrieve_file(source, limit, timeout): try: with aiohttp.Timeout(timeout): response = yield from aiohttp.get(source.url) content = yield from response.text() except Exception as e: logger.debug(e) return [] if response.status == 200: tweets = parse_tweets(content.splitlines(), source) return sorted(tweets, reverse=True)[:limit] else: return []
def retrieve_file(client, source, limit, cache): is_cached = cache.is_cached(source.url) if cache else None headers = {"If-Modified-Since": cache.last_modified(source.url)} if is_cached else {} try: response = yield from client.get(source.url, headers=headers) content = yield from response.text() except Exception as e: if is_cached: logger.debug("{}: {} - using cached content".format(source.url, e)) return cache.get_tweets(source.url, limit) else: logger.debug(e) return [] if response.status == 200: tweets = parse_tweets(content.splitlines(), source) if cache: last_modified_header = response.headers.get("Last-Modified") if last_modified_header: logger.debug("{} returned 200 and Last-Modified header - adding content to cache".format(source.url)) cache.add_tweets(source.url, last_modified_header, tweets) else: logger.debug("{} returned 200 but no Last-Modified header - can’t cache content".format(source.url)) else: logger.debug("{} returned 200".format(source.url)) return sorted(tweets, reverse=True)[:limit] elif response.status == 410 and is_cached: # 410 Gone: # The resource requested is no longer available, # and will not be available again. logger.debug("{} returned 410 - deleting cached content".format(source.url)) cache.remove_tweets(source.url) return [] elif is_cached: logger.debug("{} returned {} - using cached content".format(source.url, response.status)) return cache.get_tweets(source.url, limit) else: logger.debug("{} returned {}".format(source.url, response.status)) return []
def retrieve_file(client, source, limit, cache): is_cached = cache.is_cached(source.url) if cache else None headers = {"If-Modified-Since": cache.last_modified(source.url)} if is_cached else {} try: response = yield from client.request("get",source.url, headers=headers,allow_redirects=False) content = yield from response.text() except Exception as e: if is_cached: logger.debug("{}: {} - using cached content".format(source.url, e)) return cache.get_tweets(source.url, limit) #comp490 elif e==ssl.CertificateError: click.echo("Warning the source: "+source.nick+" is unsafe: Hostname does not match name on SSL certificate") return [] elif e==aiohttp.errors.ClientOSError: if "[[SSL: CERTIFICATE_VERIFY_FAILED" in str(e): click.echo("Warning the source: "+source.nick+" is unsafe: The ssl certificate has expired") return [] elif "[SSL: EXCESSIVE_MESSAGE_SIZE]" in str(e): click.echo("Warning the source: "+source.nick+" is unsafe: source has sent an invalid response") #COMP490 else: logger.debug(e) return [] if response.status == 200: tweets = parse_tweets(content.splitlines(), source) if cache: last_modified_header = response.headers.get("Last-Modified") if last_modified_header: logger.debug("{} returned 200 and Last-Modified header - adding content to cache".format(source.url)) cache.add_tweets(source.url, last_modified_header, tweets) else: logger.debug("{} returned 200 but no Last-Modified header - can’t cache content".format(source.url)) else: logger.debug("{} returned 200".format(source.url)) return sorted(tweets, reverse=True)[:limit] #comp490 elif response.status==301: cache = Cache.discover() conf=Config.discover() tweets=cache.get_tweets(source.url) conf.remove_source_by_nick(source.nick) url=response.headers["Location"] conf.add_source(Source(source.nick,url)) for tweet in tweets: cache.add_tweet(url,0,tweet) #comp490 elif response.status == 410 and is_cached: # 410 Gone: # The resource requested is no longer available, # and will not be available again. logger.debug("{} returned 410 - deleting cached content".format(source.url)) cache.remove_tweets(source.url) return [] elif is_cached: logger.debug("{} returned {} - using cached content".format(source.url, response.status)) return cache.get_tweets(source.url, limit) else: logger.debug("{} returned {}".format(source.url, response.status)) return []