Exemplo n.º 1
0
 def run(self) -> None:
     if self.in_dir:
         batch_results = BatchResults(self.in_dir)
         batch_results.idify(self.out_dir if self.out_dir else self.in_dir)
     else:
         for line in sys.stdin:
             sys.stdout.write(str(Tweet(json.loads(line)).id) + "\n")
Exemplo n.º 2
0
def _assert_results_dir_structure(
    results_dir: Path,
    batch_entries: Sequence[BatchEntry],
    *,
    allow_empty: bool = False,
) -> None:
    assert results_dir.exists()
    assert 0 != len(batch_entries)

    batch_results = BatchResults(results_dir)

    # Can't create sets, because BatchEntry is not hashable, thus compare maps.
    assert {batch_entry.id: batch_entry
            for batch_entry in batch_entries} == {
                batch_entry.id: batch_entry
                for batch_entry in batch_results
            }

    for batch_entry in batch_results:
        assert batch_entry.completed_at is not None
        assert batch_entry.completed_at < datetime.now()
        assert batch_entry.exception is None

        tweets = list(batch_results.tweets(batch_entry))
        if not allow_empty:
            assert 0 != len(tweets)

        assert checked_cast(int, batch_entry.request.max_tweets) >= len(tweets)
        for tweet in tweets:
            assert (checked_cast(Search, batch_entry.request).query.lower()
                    in json.dumps(tweet.to_json()).lower())
Exemplo n.º 3
0
 def run(self) -> None:
     if self.in_dir:
         batch_results = BatchResults(self.in_dir)
         batch_results.unidify(
             self.settings.twitter_api, self.out_dir if self.out_dir else self.in_dir
         )
     else:
         for tweet in statuses_lookup(
             (TweetId(line.strip()) for line in sys.stdin), self.settings.twitter_api
         ):
             if tweet is not None:
                 sys.stdout.write(json.dumps(tweet.to_json()) + "\n")
Exemplo n.º 4
0
def test_unidify_fail_and_restart(
    requests: Iterable[Request],
    settings: NastySettings,
    monkeypatch: MonkeyPatch,
    tmp_path: Path,
) -> None:
    idify_dir = tmp_path / "idify"
    unidify_dir = tmp_path / "unidify"

    batch = Batch()
    for request in requests:
        batch.append(request)
    results = batch.execute()
    assert results is not None

    tweets = {
        tweet.id: tweet
        for entry in results for tweet in results.tweets(entry)
    }
    tweets_truncated = dict(tweets)
    del tweets_truncated[TweetId("1115690615612825601")]

    idified = results.idify(idify_dir)
    assert idified is not None

    monkeypatch.setattr(
        nasty.batch.batch_results,
        nasty.batch.batch_results.statuses_lookup.__name__,  # type: ignore
        _mock_statuses_lookup(tweets_truncated),
    )

    # Assert KeyError is propagated, because a Tweet is missing from tweets_truncated.
    with pytest.raises(KeyError):
        idified.unidify(settings.twitter_api, unidify_dir)
    unidified = BatchResults(unidify_dir)
    assert len(batch) > len(unidified)

    monkeypatch.setattr(
        nasty.batch.batch_results,
        nasty.batch.batch_results.statuses_lookup.__name__,  # type: ignore
        _mock_statuses_lookup(tweets),
    )

    unidified = idified.unidify(settings.twitter_api, unidify_dir)
    assert unidified is not None
    assert len(batch) == len(unidified)
    assert tweets == {
        tweet.id: tweet
        for entry in unidified for tweet in unidified.tweets(entry)
    }
Exemplo n.º 5
0
def _assert_tweet_texts(results: BatchResults) -> None:
    assert [
        "We encourage submissions of new, previously, or concurrently published "
        "research. The event should be a forum for researchers to exchange ideas, "
        "discuss work, and get feedback. We hope you'll consider submitting your work.",
        "We'll have talks from research leaders on the latest advances in NLP. "
        "@NandoDF will be giving the keynote and more speakers will be announced soon. "
        "https://t.co/SB3URxn6ab",
        "Registration will open soon. In the meantime, we'll hope you'll save the date "
        "and consider joining us for what should be a fun day of listening to "
        "stimulating talks, mingling with like-minded people, exchanging ideas, and "
        "maybe even striking up a collaboration.",
    ] == [tweet.text for tweet in results.tweets(results[0])]
Exemplo n.º 6
0
def _assert_tweet_ids(results: BatchResults) -> None:
    assert [
        TweetId("1115690002233556993"),
        TweetId("1115690615612825601"),
        TweetId("1115691710657499137"),
    ] == list(results.tweet_ids(results[0]))