Exemplo n.º 1
0
def get():
    try:
        response = urllib.request.urlopen("http://news.google.com/news?pz=1&cf=all&ned=us&hl=en&q=kaine&output=rss")
    except URLError as e:
        print(e.reason)
    else:
        html = BeautifulSoup(response.read(), "html.parser")
        items = html.find_all('item')
        for item in items:
            headline = item.title.string
            h_split = headline.split()

            # We don't want to use incomplete headlines
            if "..." in headline:
                continue

            # Try to weed out all-caps headlines
            if count_caps(h_split) >= len(h_split) - 3:
                continue

            # Skip anything too offensive
            if not tact(headline):
                continue

            # Remove attribution string
            if "-" in headline:
                headline = headline.split("-")[:-1]
                headline = ' '.join(headline).strip()

            if process(headline):
                break
            else:
                continue
Exemplo n.º 2
0
def get():
    try:
        request = urllib2.Request(
            "http://news.google.com/news?pz=1&cf=all&ned=us&hl=en&output=rss")
        response = urllib2.urlopen(request)
    except urllib2.URLError as e:
        print e.reason
    else:
        html = BeautifulSoup(response.read())
        items = html.find_all('item')
        for item in items:
            headline = item.title.string
            h_split = headline.split()

            # We don't want to use incomplete headlines
            if "..." in headline:
                continue

            # Try to weed out all-caps headlines
            if count_caps(h_split) >= len(h_split) - 3:
                continue

            # Skip anything too offensive
            if not tact(headline):
                continue

            # Remove attribution string
            if "-" in headline:
                headline = headline.split("-")[:-1]
                headline = ' '.join(headline).strip()

            if process(headline):
                break
            else:
                continue
Exemplo n.º 3
0
def filter_tweets(tweets_):
    """Filter out tweets to avoid mentions, offensive content, etc. """
    while True:
        tweet_ = tweets_.pop(0)
        text = tweet_.text
        if len(tweets_) == 0:
            return
        if not (hasattr(tweet_, "retweeted_status") or
                tweet_.in_reply_to_status_id or
                tweet_.in_reply_to_screen_name or
                tweet_.truncated or
                '@' in text or
                'RT' in text or
                '#' in text or wordfilter.blacklisted(text) or
                not tact(text)):
            if process(text):
                break
            else:
                continue
Exemplo n.º 4
0
def get_news():
    try:
        request = urllib2.Request(
            "http://news.google.com/news?pz=1&cf=all&ned=us&hl=en&output=rss")
        response = urllib2.urlopen(request)
    except urllib2.URLError as e:
        print(e.reason)
    else:
        html = BeautifulSoup(response.read(), "html.parser")
        items = html.find_all('item')
        for item in items:
            headline = item.title.string
            h_split = headline.split()

            # Skip incomplete headlines
            if "..." in headline:
                continue

            # Skip headlines in all caps
            if count_caps(h_split) >= len(h_split) - 3:
                continue

            # Filter for offensive words
            if wordfilter.blacklisted(headline):
                continue

            # Filter again
            if not tact(headline):
                continue

            # Remove article attributions
            if "-" in headline:
                headline = headline.split("-")[:-1]
                headline = ' '.join(headline).strip()

            if process(headline):
                break
            else:
                continue
Exemplo n.º 5
0
import offensive

with open ("/home/staeiou/bots/dystopedia/titles.txt", encoding="utf-8") as f:
    deltext = f.read()

deltext = deltext.replace(".", " ")
deltext = deltext.encode('ascii', 'ignore').decode('ascii')

deletion_model = markovify.NewlineText(deltext)

tweet = None
tweets = []
for i in range(250):
    title = deletion_model.make_short_sentence(90)
    if title is not None and not wordfilter.blacklisted(title) and offensive.tact(title):
        tweets.append(title)

tweets = sorted(tweets, key=len, reverse=True)
rand_num = random.randrange(0,25)

if tweets[rand_num] is not None:
    print(tweets[rand_num])

CONSUMER_KEY = twitter_login.CONSUMER_KEY
CONSUMER_SECRET = twitter_login.CONSUMER_SECRET
ACCESS_TOKEN = twitter_login.ACCESS_TOKEN
ACCESS_TOKEN_SECRET = twitter_login.ACCESS_TOKEN_SECRET

auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET)
Exemplo n.º 6
0
    def _process_tweet(self, tweet):
        '''
        Translate a tweet and post the translated one.

        tweet:
            The tweet to translate.
        '''
        log_details = [
            ('original-id', tweet.id),
            ('original-url',
             self._get_tweet_url(self._target_user_name, tweet.id)),
            ('original-time', tweet.created_at.isoformat()),
            ('original-text', tweet.full_text),
        ]

        if hasattr(tweet, 'retweeted_status'):
            # Note that retweets with an extra comment don't have retweeted_status, but
            # they have quoted_status, so we don't skip them.
            log_details += [
                ('skipped-because-retweet', True),
            ]
            intermediate_translations = None
            new_tweet = None
        else:
            self._follow_mentions(tweet)

            intermediate_translations = []

            def translation_cb(counter, language, intermediate_text):
                intermediate_translations.append(
                    collections.OrderedDict([
                        ('counter', counter),
                        ('language', language),
                        ('text', intermediate_text),
                    ]))

            sanitized_text = self._sanitize_tweet(tweet)
            equilibrium_reached, sanitized_translated_text = equilibrium.find_equilibrium(
                self._translator, 'en', 'ja', sanitized_text, translation_cb)
            translated_text = self._unsanitize_tweet_text(
                sanitized_translated_text)
            new_tweet = self._post_tweet(translated_text, tweet.id)

            if tweet.full_text != sanitized_text:
                log_details += [
                    ('original-sanitized-text', sanitized_text),
                ]

            log_details += [
                ('translated-id', new_tweet.id),
                ('translated-url',
                 self._get_tweet_url(self._my_user.id, new_tweet.id)),
                ('translated-time', new_tweet.created_at.isoformat()),
            ]

            # Maybe the tweet was shortened or mangled in some other way by Twitter.
            if translated_text != new_tweet.full_text:
                log_details += [
                    ('translated-initial-text', translated_text),
                ]

            log_details += [
                ('translated-text', new_tweet.full_text),
                ('equilibrium-reached', equilibrium_reached),
                ('translator', self._translator.name),
            ]

            # For now we just log about offensiveness.
            # Later I can verify how useful this check is and, if needed, not post the
            # tweets.
            original_offensive = not offensive.tact(tweet.full_text)
            new_offensive = not offensive.tact(translated_text)
            if original_offensive and new_offensive:
                offensiveness = 'both'
            elif original_offensive:
                offensiveness = 'original'
            elif new_offensive:
                offensiveness = 'retranslated'
            else:
                offensiveness = 'none'

            log_details += [
                ('offensiveness', offensiveness),
            ]

        self._last_processed.set_last_processed(tweet.id_str)
        # We save logs after the ID, so there's a chance we actually fail to save logs for
        # this tweet. This is better than retweeting the same thing twice.
        self._log(self._serialize_list_to_ordered_dict(log_details))

        self._log_tweet_json(tweet)
        self._log_tweet_json(new_tweet)

        if intermediate_translations:
            json_text = self._serialize_json(intermediate_translations)
            extra_name = '{}-{}-translations.json'.format(
                tweet.user.screen_name, tweet.id)
            self._log(json_text, extra_name)