Пример #1
0
def get_local_tweets(source, limit):
    try:
        with open(source.file, "r") as fh:
            input_lines = fh.readlines()
    except (FileNotFoundError, PermissionError) as e:
        logger.debug(e)
        return []
    local_tweets = parse_tweets(input_lines, source)
    return sorted(local_tweets, reverse=True)[:limit]
Пример #2
0
def retrieve_file(client, source, limit, cache):
    is_cached = cache.is_cached(source.url) if cache else None
    headers = {
        "If-Modified-Since": cache.last_modified(source.url)
    } if is_cached else {}

    try:
        response = yield from client.get(source.url, headers=headers)
        content = yield from response.text()
    except Exception as e:
        if is_cached:
            logger.debug("{0}: {1} - using cached content".format(
                source.url, e))
            return cache.get_tweets(source.url, limit)
        else:
            logger.debug("{0}: {1}".format(source.url, e))
            return []

    if response.status == 200:
        tweets = parse_tweets(content.split("\n"), source)

        if cache:
            last_modified_header = response.headers.get("Last-Modified")
            if last_modified_header:
                logger.debug(
                    "{0} returned 200 and Last-Modified header - adding content to cache"
                    .format(source.url))
                cache.add_tweets(source.url, last_modified_header, tweets)
            else:
                logger.debug(
                    "{0} returned 200 but no Last-Modified header - can’t cache content"
                    .format(source.url))
        else:
            logger.debug("{0} returned 200".format(source.url))

        return sorted(tweets, reverse=True)[:limit]

    elif response.status == 410 and is_cached:
        # 410 Gone:
        # The resource requested is no longer available,
        # and will not be available again.
        logger.debug("{0} returned 410 - deleting cached content".format(
            source.url))
        cache.remove_tweets(source.url)
        return []

    elif is_cached:
        logger.debug("{0} returned {1} - using cached content".format(
            source.url, response.status))
        return cache.get_tweets(source.url, limit)

    else:
        logger.debug("{0} returned {1}".format(source.url, response.status))
        return []
Пример #3
0
def test_parse_tweets():
    """Test parsing multiple tweet lines"""
    source = Source("foo", "bar")
    raw_tweets = [
        "2016-02-08T00:00:00\tHallo",
        "2016-02-08T00:00:00\tBar\n",
        "2016-02-08T00:00:00\tFoo\n",
        "3000-02-08T00:00:00\tHallo\n",
    ]
    tweets = parse_tweets(raw_tweets, source)
    assert len(tweets) == 3
Пример #4
0
def retrieve_file(source, limit, timeout):
    try:
        with aiohttp.Timeout(timeout):
            response = yield from aiohttp.get(source.url)
        content = yield from response.text()
    except Exception as e:
        logger.debug(e)
        return []
    if response.status == 200:
        tweets = parse_tweets(content.splitlines(), source)
        return sorted(tweets, reverse=True)[:limit]
    else:
        return []
Пример #5
0
def retrieve_file(source, limit, timeout):
    try:
        with aiohttp.Timeout(timeout):
            response = yield from aiohttp.get(source.url)
        content = yield from response.text()
    except Exception as e:
        logger.debug(e)
        return []
    if response.status == 200:
        tweets = parse_tweets(content.splitlines(), source)
        return sorted(tweets, reverse=True)[:limit]
    else:
        return []
Пример #6
0
def retrieve_file(client, source, limit, cache):
    is_cached = cache.is_cached(source.url) if cache else None
    headers = {"If-Modified-Since": cache.last_modified(source.url)} if is_cached else {}

    try:
        response = yield from client.get(source.url, headers=headers)
        content = yield from response.text()
    except Exception as e:
        if is_cached:
            logger.debug("{}: {} - using cached content".format(source.url, e))
            return cache.get_tweets(source.url, limit)
        else:
            logger.debug(e)
            return []

    if response.status == 200:
        tweets = parse_tweets(content.splitlines(), source)

        if cache:
            last_modified_header = response.headers.get("Last-Modified")
            if last_modified_header:
                logger.debug("{} returned 200 and Last-Modified header - adding content to cache".format(source.url))
                cache.add_tweets(source.url, last_modified_header, tweets)
            else:
                logger.debug("{} returned 200 but no Last-Modified header - can’t cache content".format(source.url))
        else:
            logger.debug("{} returned 200".format(source.url))

        return sorted(tweets, reverse=True)[:limit]

    elif response.status == 410 and is_cached:
        # 410 Gone:
        # The resource requested is no longer available,
        # and will not be available again.
        logger.debug("{} returned 410 - deleting cached content".format(source.url))
        cache.remove_tweets(source.url)
        return []

    elif is_cached:
        logger.debug("{} returned {} - using cached content".format(source.url, response.status))
        return cache.get_tweets(source.url, limit)

    else:
        logger.debug("{} returned {}".format(source.url, response.status))
        return []
Пример #7
0
def retrieve_file(client, source, limit, cache):
    is_cached = cache.is_cached(source.url) if cache else None
    headers = {"If-Modified-Since": cache.last_modified(source.url)} if is_cached else {}

    try:
        response = yield from client.request("get",source.url, headers=headers,allow_redirects=False)
        content = yield from response.text()
    except Exception as e:
        if is_cached:
            logger.debug("{}: {} - using cached content".format(source.url, e))
            return cache.get_tweets(source.url, limit)
    #comp490
        elif e==ssl.CertificateError:

            click.echo("Warning the source: "+source.nick+" is unsafe: Hostname does not match name on SSL certificate")
            return []
        elif e==aiohttp.errors.ClientOSError:

            if "[[SSL: CERTIFICATE_VERIFY_FAILED" in str(e):
                click.echo("Warning the source: "+source.nick+" is unsafe: The ssl certificate has expired")
                return []
            elif "[SSL: EXCESSIVE_MESSAGE_SIZE]" in str(e):
                click.echo("Warning the source: "+source.nick+" is unsafe: source has sent an invalid response")
    #COMP490
        else:
            logger.debug(e)
            return []

    if response.status == 200:
        tweets = parse_tweets(content.splitlines(), source)

        if cache:
            last_modified_header = response.headers.get("Last-Modified")
            if last_modified_header:
                logger.debug("{} returned 200 and Last-Modified header - adding content to cache".format(source.url))
                cache.add_tweets(source.url, last_modified_header, tweets)
            else:
                logger.debug("{} returned 200 but no Last-Modified header - can’t cache content".format(source.url))
        else:
            logger.debug("{} returned 200".format(source.url))

        return sorted(tweets, reverse=True)[:limit]
#comp490
    elif response.status==301:
        cache = Cache.discover()
        conf=Config.discover()
        tweets=cache.get_tweets(source.url)

        conf.remove_source_by_nick(source.nick)
        url=response.headers["Location"]
        conf.add_source(Source(source.nick,url))
        for tweet in tweets:
            cache.add_tweet(url,0,tweet)
#comp490
    elif response.status == 410 and is_cached:
        # 410 Gone:
        # The resource requested is no longer available,
        # and will not be available again.
        logger.debug("{} returned 410 - deleting cached content".format(source.url))
        cache.remove_tweets(source.url)
        return []

    elif is_cached:
        logger.debug("{} returned {} - using cached content".format(source.url, response.status))
        return cache.get_tweets(source.url, limit)

    else:
        logger.debug("{} returned {}".format(source.url, response.status))
        return []