Пример #1
0
    except Exception as e:
        print("ERROR:" + str(e))


if __name__ == "__main__":
    print("------------------------------------------ Iniciando. Hora: " +
          str(datetime.now()) + " -----------------------")

    browser = mechanicalsoup.StatefulBrowser(soup_config={'features': 'lxml'})
    scrapeCuprum(browser)
    scrapeProvida(browser)
    scrapeSVS(browser)

    print(guardado)
    print(MsjGuardado)

    if guardado:
        token = "895064451677736960-JCDhYCZ7wIztWljb9Wp36W2HXl9ZW3F"
        token_secret = "5Fbpzl7g61ZEWFfXRxvU9Jk8en5TI8piCn7p0okOSdvX8"
        consumer_key = "2b1fcHhGndhj3ocgZW1goGkFQ"
        consumer_secret = "cbxMnFMQvbTPpd4OGvZyOkiAGQARuJEIf85JcxE6RkcYgUZas3"

        t = Twitter(
            auth=OAuth(token, token_secret, consumer_key, consumer_secret))

        for msj in MsjGuardado:
            t.statuses.update(status=msj)

    print("------------------------------------------ Terminó: " +
          str(datetime.now()) + " -----------------------")
Пример #2
0
        users = sorted(set(users) | set(new_users))
        json.dump(users, fp)
    return users


def update_links(new_links, filename):
    with open(filename, 'r') as fp:
        links = [tuple(l.split(",")) for l in fp.read().split("\n")[1:]]
    with open(filename, 'w') as fp:
        links = sorted(set(links) | set(new_links))
        fp.write("source,target,datetime\n")
        fp.write("\n".join([",".join(l) for l in links]))


while True:
    t = Twitter(auth=OAuth(OAUTH_TOKEN, OAUTH_TOKEN_SECRET, CONSUMER_KEY,
                           CONSUMER_SECRET))

    # Get users that tweeted with #netsci2018 hashtag
    print "Searching for tweets with hashtags '#NetSci2018' and '#NetSci18'"
    collection_tweets = []
    for hashtag in hashtags:
        collection_tweets += search_tweets(hashtag)['statuses']
    print "Loaded %d tweets" % len(collection_tweets),

    print "from",
    users = sorted(
        set(
            update_users([
                tweet['user']['screen_name'] for tweet in collection_tweets
                if tweet['text'][:2] != "RT"
            ])))  # Everybody who has tweeted
Пример #3
0
from twitter import Twitter, OAuth
from os import environ
from pprint import pprint

consumer_key = environ['TWITTER_CONSUMER_KEY']
consumer_secret = environ['TWITTER_CONSUMER_SECRET']
access_token = environ['TWITTER_ACCESS_TOKEN']
access_secret = environ['TWITTER_ACCESS_SECRET']

t = Twitter(
    auth=OAuth(access_token, access_secret, consumer_key, consumer_secret))
statusUpdate = t.statuses.update(status='Hallo, word!')
pprint(statusUpdate)
Пример #4
0
from twitter import Twitter, OAuth, TwitterHTTPError, TwitterStream
try:
    import json
except ImportError:
    import simplejson as json
config = {}
execfile("config.py", config)
oauth = OAuth(config["ACCESS_TOKEN"], config["ACCESS_SECRET"],
              config["CONSUMER_KEY"], config["CONSUMER_SECRET"])
twitter_stream = TwitterStream(auth=oauth)

iterator = twitter_stream.statuses.filter(track="@twitter",
                                          country="United States",
                                          countrycode="US")
tweet_count = 882
for tweet in iterator:
    tweet_count -= 1
    print json.dumps(tweet)
    if tweet_count <= 0:
        break
Пример #5
0
except ImportError:
    import simplejson as json

from twitter import Twitter, OAuth, TwitterHTTPError, TwitterStream

NUM_TWEETS = 5000000
k = 1

ACCESS_TOKEN = os.environ.get('TWITTER_ACCESS_TOKEN')
ACCESS_SECRET = os.environ.get('TWITTER_ACCESS_SECRET')
API_KEY = os.environ.get('TWITTER_API_KEY')
API_SECRET = os.environ.get('TWITTER_API_SECRET')

print("starting")

oauth = OAuth(ACCESS_TOKEN, ACCESS_SECRET, API_KEY, API_SECRET)

twitter_stream = TwitterStream(auth=oauth)

tweets = twitter_stream.statuses.sample()

allTweets = []

print("getting tweets")

tweetTotal = NUM_TWEETS
'''
Gets tweets from Twitter, creates dictionaries from them, and writes them to file as csv
'''

for tweet in tweets:
Пример #6
0
 def connect(self):
     self.client = Twitter(auth=OAuth(self.token, self.token_key,
                                      self.con_secret, self.con_secret_key))
Пример #7
0
from twitter.api import Twitter, TwitterError
from twitter.oauth import OAuth, read_token_file
from twitter.oauth_dance import oauth_dance
from twitter.auth import NoAuth
from twitter.util import Fail, err, expand_line, parse_host_list
from twitter.follow import lookup

CONSUMER_KEY='XLVBlYhYqJNAPPD5OEQ'
CONSUMER_SECRET='EUDfuBcgB37Dn34Vo6tSaKcBKQESQOW1M6PIMQ'
oauth_filename = (os.getenv("HOME", "") + os.sep
                  + ".twitter-archiver_oauth")
# if not os.path.exists(oauth_filename):
#     oauth_dance("Twitter-Archiver", CONSUMER_KEY, CONSUMER_SECRET,
#                 oauth_filename)
oauth_token, oauth_token_secret = read_token_file(oauth_filename)
auth = OAuth(oauth_token, oauth_token_secret, CONSUMER_KEY,
             CONSUMER_SECRET)

t = Twitter(auth=auth, api_version='1.1', domain='api.twitter.com')
# print twitter.statuses.home_timeline()

print argv[1]

# read the file

def main():
    json_file = open(argv[1], 'r')
    for line in json_file.readlines():
        item = json.loads(line)
        if item.has_key('asker'):
            twitter_name = item['asker']['twitter_username']
        if item.has_key('answerer'):
Пример #8
0
#              also collects user information from these tweets and adds them to TwitterUsers.json
#              Finally adds 5 tweet per user creating a nice pool of mixed tweets.
#######################################################################################################################################

import json
from utils import ConfigProvider, FileContentLoader
from twitter import OAuth, Twitter, TwitterStream

if __name__ == '__main__':

    AllTweets = None
    AllUsers = {}
    AllNonEventTweets = None

    # Initiate the connection to Twitter Streaming / search API
    oauth = OAuth(ConfigProvider.AccessToken, ConfigProvider.AccessSecret,
                  ConfigProvider.ConsumerKey, ConfigProvider.ConsumberSecret)
    twitter_stream = TwitterStream(auth=oauth)
    twitter = Twitter(auth=oauth)

    # Load life events detail and collect tweets for each life event
    LifeEventsList = FileContentLoader.LifeEventsList()
    for LifeEvent in LifeEventsList["LifeEventList"]:
        # Collect 10 tweets for current life event
        # print "Processing tweets for life event:" , LifeEvent["Topic"]
        EventTweets = twitter.search.tweets(q=LifeEvent["Event"],
                                            lang='en',
                                            count=10)
        # Collect 5 tweets per stem word for this life event category
        for StemWord in LifeEvent["StemWords"]:
            EventStemWordTweets = twitter.search.tweets(q=StemWord,
                                                        lang='en',
Пример #9
0
def getSparkSessionInstance(sparkConf):
	if ('sparkSessionSingletonInstance' not in globals()):
		globals()['sparkSessionSingletonInstance'] = SparkSession.builder.config(conf=sparkConf).enableHiveSupport().getOrCreate()
	return globals()['sparkSessionSingletonInstance']

def savetweets():
    ssc = StreamingContext(sc, 600)
    kvs = KafkaUtils.createDirectStream(ssc, ["test"], {"metadata.broker.list": "localhost:9092"})
    kvs.foreachRDD(receiveTweets)
    producer.flush()
    ssc.start()
    ssc.awaitTermination()

def receiveTweets(time, rdd):
    iterator = twitter_stream.statuses.sample()
    count = 0
    for tweet in iterator:
        if 'extTweet' in tweet:
            producer.send('savedata', bytes(json.dumps(tweet, indent=6), "ascii"))
            count+=1
            if(count>=20000):
                break

if __name__ == "__main__":
    sc = SparkContext(appName="Project 2: Store Tweets")
    credentials = read_credentials()
    oauth = OAuth(credentials['ACCESS_TOKEN'], credentials['ACCESS_SECRET'], credentials['CONSUMER_KEY'], credentials['CONSUMER_SECRET'])
    twitter_stream = TwitterStream(auth=oauth)
    producer = KafkaProducer(bootstrap_servers='localhost:9092')
    savetweets()
Пример #10
0
 def __init__(self, access_token, access_token_secret, consumer_key, consumer_secret):
     self.twitter = Twitter(
         auth=OAuth(access_token, access_token_secret, consumer_key, consumer_secret), retry=10)
Пример #11
0
from twitter import Twitter
from twitter import OAuth

t = Twitter(auth=OAuth(
    '1539088022-I8k9xAUGjAfxGfgHQj69YMaTHZcLUsR2Nz9bvst',
    'n44jnA3XPrLV2eXmS8Xqn1SKDj2GDD9WJk432FySfPkt4',
    'jjMY07Ck9Zb41CyL8cWxAcwNl',
    '2ie5wUbzdTR2Y8hpBBeeoQLXc0UO8WFizhdgx9mYFaxyHqIxsG'
    ))
pythonTweets = t.search.tweets(q='#python')
# print(str(pythonTweets).encode('GBK', 'ignore'))

statusUpdate = t.statuses.update(status='Hello, world kkk!')
print(str(statusUpdate).encode('GBK', 'ignore'))
Пример #12
0
"""
Streaming APIs give access to (usually a sample of) all tweets as
they published on Twitter.
"""
# The Streaming API only sends out real-time tweets

# Import JSON to deal with twitter wrapper output
# import json
import tokens as tokens

# Importing twitter wrapper library
# from twitter import Twitter, OAuth, TwitterHTTPError, TwitterStream
from twitter import Twitter, OAuth

oauth = OAuth(tokens.ACCESS_TOKEN, tokens.ACCESS_SECRET, tokens.CONSUMER_KEY,
              tokens.CONSUMER_SECRET)

# Initiate the connection to Twitter REST API
twitter = Twitter(auth=oauth)

# Search for latest tweets about "#nlproc"
# twitter.search.tweets(q='eleição')

# Get all the locations where Twitter provides trends service
# Brazil 23424768
# world_trends = twitter.trends.available(_woeid=23424768)
# Brasilia 455819
# world_trends = twitter.trends.available(_woeid=455819)
# Sao Paulo 455827
# world_trends = twitter.trends.available(_woeid=455827)
Пример #13
0
 def authenticate(self):
     '''
     Authenticate app using OAuth
     '''
     self.oauth = OAuth(self.ACCESS_TOKEN, self.ACCESS_SECRET,
                        self.CONSUMER_KEY, self.CONSUMER_SECRET)
Пример #14
0
    return t.search.tweets(q='@' + BOT_NAME,
                           result_type='recent',
                           since_id=latest_id)['statuses']


# return the id of the latest tweet mentioning @BOT_NAME
def fetch_latest_id():
    return t.search.tweets(q='@' + BOT_NAME, result_type='recent',
                           count=1)['statuses'][0]['id']


if __name__ == '__main__':

    # initialize Twitter connection
    t = Twitter(
        auth=OAuth(OAUTH_TOKEN, OAUTH_SECRET, CONSUMER_KEY, CONSUMER_SECRET))

    # read in the latest id from the last check
    f = open('.latest_id', 'r')
    latest_id = f.read().rstrip()
    f.close()

    # check for unseen tweets since the latest id
    results = fetch_unseen_mentions(latest_id)

    # if we got any tweets, reply to them
    if results:
        for tweet in reversed(results):

            tweeter = tweet['user']['screen_name']
            artist = random_artist()
Пример #15
0
def tweet(title, collaborations, url, version=1):
    """
    Announce addition or revision of a HEPData record on Twitter.

    :param title:
    :param collaborations:
    :param url:
    :param version:
    :return:
    """
    if USE_TWITTER:

        OAUTH_TOKEN = current_app.config['OAUTH_TOKEN']
        OAUTH_SECRET = current_app.config['OAUTH_SECRET']
        CONSUMER_KEY = current_app.config['CONSUMER_KEY']
        CONSUMER_SECRET = current_app.config['CONSUMER_SECRET']

        if not OAUTH_TOKEN or not OAUTH_SECRET or not CONSUMER_KEY or not CONSUMER_SECRET:
            # log this error
            print("Twitter credentials must be supplied!")
        else:
            twitter = Twitter(auth=OAuth(OAUTH_TOKEN, OAUTH_SECRET,
                                         CONSUMER_KEY, CONSUMER_SECRET))

            cleaned_title = decode_string(
                encode_string(title))  # in case of binary characters in title
            cleaned_title = replace(
                cleaned_title)  # use UnicodeIt to replace LaTeX expressions
            cleaned_title = cleanup_latex(
                cleaned_title)  # remove some remaining LaTeX encodings

            words = len(cleaned_title.split())

            # Try to tweet with complete paper title.
            # If tweet exceeds 280 characters, keep trying with one less word each time.
            tweeted = False
            while words and not tweeted:

                try:

                    if version == 1:
                        status = "Added{0} data on \"{1}\" to {2}".format(
                            get_collaboration_string(collaborations),
                            truncate_string(cleaned_title, words), url)
                    else:
                        status = "Revised{0} data on \"{1}\" at {2}?version={3}".format(
                            get_collaboration_string(collaborations),
                            truncate_string(cleaned_title, words), url,
                            version)

                    twitter.statuses.update(status=status)
                    tweeted = True
                    print("Tweeted: {}".format(status))

                except Exception as e:
                    # It would be nice to get a stack trace here
                    if e.e.code == 403:
                        error = json.loads(e.response_data.decode('utf8'))
                        if error["errors"][0][
                                "code"] == 186:  # Status is over 140 characters.
                            words = words - 1  # Try again with one less word.
                        else:
                            break
                    else:
                        break

            if not tweeted:
                print(e.__str__())
                print("(P) Failed to post tweet for record {0}".format(url))
Пример #16
0
)
print(
    colored("\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t WELCOME TO TWITTERBOT ",
            color='blue',
            attrs=['bold']))

#-----------------------------------------------------------------------------------------------------------------------
# CREDENTIALS
API_KEY = 'tWlZPllDzXGvuRaPYBMu82WjE'
API_SECRET = 'cBWdZn91zmX7LtQI8UczNdFC66TdkPdDHfLS9lVkpEN4cwDJxL'
ACCESS_TOKEN = '410532691-IK0YkbvXH4A0SVrgHiuKHWVpB4Srq9fGhsj1zAmr'
ACCESS_TOKEN_SECRET = 'Ag3H99umpVoaaY51Tzf5p6DIKdugDQuKEfMzkDQoud41H'

#-----------------------------------------------------------------------------------------------------------------------

twitter_oauth = OAuth(ACCESS_TOKEN, ACCESS_TOKEN_SECRET, API_KEY, API_SECRET)
twitter = Twitter(auth=twitter_oauth)

oauth = tweepy.OAuthHandler(API_KEY, API_SECRET)
oauth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET)
api = tweepy.API(oauth)

#-----------------------------------------------------------------------------------------------------------------------


#FOR DISPLAYING MY ACCOUNT DETAILS
def details():
    myaccount = api.me()
    print(colored("\n Personal Details", color='blue', attrs=['underline']))
    print 'Name: ' + myaccount.name
    print 'Friends: ' + str(myaccount.friends_count)
Пример #17
0
import pathlib

from twitter import OAuth

import encrypt
import tweepy

env_file = str(pathlib.Path.cwd().parent) + "\\vars.json"
key_path = str(pathlib.Path.cwd().parent) + "\\key.key"

key = encrypt.load_key(key_path)

env_vars = encrypt.decrypt_return_data(env_file, key)

auth = OAuth(env_vars["TWITTER_TOKEN_KEY"], env_vars["TWITTER_TOKEN_SECRET"],
             env_vars["TWITTER_CONSUMER_KEY"],
             env_vars["TWITTER_CONSUMER_SECRET"])


class StreamListener(tweepy.StreamListener):
    def on_status(self, tweet):
        print(f"{tweet.user.name}:{tweet.text}")
        print(f"https://twitter.com/user/status/{tweet.id}")

    def on_error(self, status_code):
        if status_code == 420:
            return False
        print("Error Detected")


api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True)
Пример #18
0
# https://github.com/find-evil/echo_chamber
# https://jackie.lol/posts/presenting-echo-chamber-a-python-tool-for-blacklivesmatter/

import os
import json
from twitter import OAuth, Twitter

# REPLACE THESE PLACEHOLDERS WITH YOUR TWITTER API KEYS
t = Twitter(
    auth=OAuth('token', 'token_secret', 'consumer_key', 'consumer_secret'))

# SET YOUR TWITTER USERNAME HERE
your_username = "******"

# SET YOUR SEARCH TERM HERE
HASHTAG = "#BlackLivesMatter"

# OUTPUT LISTS
did_tweet = list()
did_not_tweet = list()
could_not_send_dm = list()

# GET LIST OF PEOPLE YOU FOLLOW
os.system('twitter-follow -o -g ' + your_username + '> tw_following.txt')
followingList = list()
with open("tw_following.txt", "r") as myfile:
    for line in myfile:
        followingList.append(line.strip())

# GET LIST OF PEOPLE WHO FOLLOW YOU
os.system('twitter-follow -o -r ' + your_username + '> tw_followers.txt')
Пример #19
0
def update_featured_social():
    """
    Update featured tweets
    """
    COPY = copytext.Copy(app_config.COPY_PATH)
    secrets = app_config.get_secrets()

    # Twitter
    print 'Fetching tweets...'

    twitter_api = Twitter(auth=OAuth(secrets['TWITTER_API_OAUTH_TOKEN'],
                                     secrets['TWITTER_API_OAUTH_SECRET'],
                                     secrets['TWITTER_API_CONSUMER_KEY'],
                                     secrets['TWITTER_API_CONSUMER_SECRET']))

    tweets = []

    for i in range(1, 4):
        tweet_url = COPY['share']['featured_tweet%i' % i]

        if isinstance(tweet_url,
                      copytext.Error) or unicode(tweet_url).strip() == '':
            continue

        tweet_id = unicode(tweet_url).split('/')[-1]

        tweet = twitter_api.statuses.show(id=tweet_id)

        creation_date = datetime.strptime(tweet['created_at'],
                                          '%a %b %d %H:%M:%S +0000 %Y')
        creation_date = '%s %i' % (creation_date.strftime('%b'),
                                   creation_date.day)

        tweet_url = 'https://twitter.com/%s/status/%s' % (
            tweet['user']['screen_name'], tweet['id'])

        photo = None
        html = tweet['text']
        subs = {}

        for media in tweet['entities'].get('media', []):
            original = tweet['text'][media['indices'][0]:media['indices'][1]]
            replacement = '<a href="%s" target="_blank" onclick="_gaq.push([\'_trackEvent\', \'%s\', \'featured-tweet-action\', \'link\', 0, \'%s\']);">%s</a>' % (
                media['url'], app_config.PROJECT_SLUG, tweet_url,
                media['display_url'])

            subs[original] = replacement

            if media['type'] == 'photo' and not photo:
                photo = {'url': media['media_url']}

        for url in tweet['entities'].get('urls', []):
            original = tweet['text'][url['indices'][0]:url['indices'][1]]
            replacement = '<a href="%s" target="_blank" onclick="_gaq.push([\'_trackEvent\', \'%s\', \'featured-tweet-action\', \'link\', 0, \'%s\']);">%s</a>' % (
                url['url'], app_config.PROJECT_SLUG, tweet_url,
                url['display_url'])

            subs[original] = replacement

        for hashtag in tweet['entities'].get('hashtags', []):
            original = tweet['text'][
                hashtag['indices'][0]:hashtag['indices'][1]]
            replacement = '<a href="https://twitter.com/hashtag/%s" target="_blank" onclick="_gaq.push([\'_trackEvent\', \'%s\', \'featured-tweet-action\', \'hashtag\', 0, \'%s\']);">%s</a>' % (
                hashtag['text'], app_config.PROJECT_SLUG, tweet_url,
                '#%s' % hashtag['text'])

            subs[original] = replacement

        for original, replacement in subs.items():
            html = html.replace(original, replacement)

        # https://dev.twitter.com/docs/api/1.1/get/statuses/show/%3Aid
        tweets.append({
            'id': tweet['id'],
            'url': tweet_url,
            'html': html,
            'favorite_count': tweet['favorite_count'],
            'retweet_count': tweet['retweet_count'],
            'user': {
                'id': tweet['user']['id'],
                'name': tweet['user']['name'],
                'screen_name': tweet['user']['screen_name'],
                'profile_image_url': tweet['user']['profile_image_url'],
                'url': tweet['user']['url'],
            },
            'creation_date': creation_date,
            'photo': photo
        })

    # Facebook
    print 'Fetching Facebook posts...'

    fb_api = GraphAPI(secrets['FACEBOOK_API_APP_TOKEN'])

    facebook_posts = []

    for i in range(1, 4):
        fb_url = COPY['share']['featured_facebook%i' % i]

        if isinstance(fb_url, copytext.Error) or unicode(fb_url).strip() == '':
            continue

        fb_id = unicode(fb_url).split('/')[-1]

        post = fb_api.get_object(fb_id)
        user = fb_api.get_object(post['from']['id'])
        user_picture = fb_api.get_object('%s/picture' % post['from']['id'])
        likes = fb_api.get_object('%s/likes' % fb_id, summary='true')
        comments = fb_api.get_object('%s/comments' % fb_id, summary='true')
        #shares = fb_api.get_object('%s/sharedposts' % fb_id)

        creation_date = datetime.strptime(post['created_time'],
                                          '%Y-%m-%dT%H:%M:%S+0000')
        creation_date = '%s %i' % (creation_date.strftime('%b'),
                                   creation_date.day)

        # https://developers.facebook.com/docs/graph-api/reference/v2.0/post
        facebook_posts.append({
            'id': post['id'],
            'message': post['message'],
            'link': {
                'url': post['link'],
                'name': post['name'],
                'caption': (post['caption'] if 'caption' in post else None),
                'description': post['description'],
                'picture': post['picture']
            },
            'from': {
                'name': user['name'],
                'link': user['link'],
                'picture': user_picture['url']
            },
            'likes': likes['summary']['total_count'],
            'comments': comments['summary']['total_count'],
            #'shares': shares['summary']['total_count'],
            'creation_date': creation_date
        })

    # Render to JSON
    output = {'tweets': tweets, 'facebook_posts': facebook_posts}

    with open('data/featured.json', 'w') as f:
        json.dump(output, f)
Пример #20
0
def wanted_generator(settings: 'Settings', ext_logger: OptionalLogger,
                     attrs: QuerySet):
    own_settings = settings.providers[constants.provider_name]

    def process_wani_tweets(current_tweets: List[Dict[str, Any]],
                            local_logger=None):
        publisher = 'wanimagazine'
        source = 'twitter'

        for tweet in current_tweets:

            cover_url = None
            if 'media' in tweet['entities']:
                for media in tweet['entities']['media']:
                    cover_url = media['media_url']

            tweet_obj, tweet_created = TweetPost.objects.get_or_create(
                tweet_id=tweet['id'],
                defaults={
                    'text':
                    tweet['text'],
                    'user':
                    publisher,
                    'posted_date':
                    datetime.strptime(tweet['created_at'],
                                      "%a %b %d %H:%M:%S %z %Y"),
                    'media_url':
                    cover_url
                })

            if not tweet_created:
                continue

            local_logger.info("Created tweet id: {}processing.".format(
                tweet_obj.tweet_id))

            match_tweet_type = re.search('【(.+)】(.*)', tweet['text'],
                                         re.DOTALL)
            if match_tweet_type:
                local_logger.info(
                    "Matched pattern (date_type: {}, artist: {}),".format(
                        match_tweet_type.group(1), match_tweet_type.group(2)))
                release_type = None
                release_date = None
                date_type = re.search(r'.*?(\d+)/(\d+).*?',
                                      match_tweet_type.group(1), re.DOTALL)
                announce_date = datetime.strptime(tweet['created_at'],
                                                  "%a %b %d %H:%M:%S %z %Y")
                if date_type:
                    release_type = 'release_date'
                    release_date = announce_date.replace(
                        month=int(date_type.group(1)),
                        day=int(date_type.group(2)),
                        hour=0,
                        minute=0,
                        second=0)
                new_book_type = re.search('新刊情報', match_tweet_type.group(1),
                                          re.DOTALL)
                if new_book_type:
                    release_type = 'new_publication'
                    release_date = datetime.strptime(
                        tweet['created_at'], "%a %b %d %H:%M:%S %z %Y")
                out_today_type = re.search('本日発売', match_tweet_type.group(1),
                                           re.DOTALL)
                if out_today_type:
                    release_type = 'out_today'
                    release_date = datetime.strptime(
                        tweet['created_at'], "%a %b %d %H:%M:%S %z %Y")
                out_tomorrow_type = re.search('明日発売',
                                              match_tweet_type.group(1),
                                              re.DOTALL)
                if out_tomorrow_type:
                    release_type = 'out_tomorrow'
                    release_date = datetime.strptime(
                        tweet['created_at'],
                        "%a %b %d %H:%M:%S %z %Y") + timedelta(days=1)

                match_title_artists = re.search('^『(.+?)』は<(.+)>',
                                                match_tweet_type.group(2),
                                                re.DOTALL)
                if match_title_artists and release_type:

                    local_logger.info(
                        "Matched pattern (title: {}, artists: {}), release_type: {}."
                        .format(match_title_artists.group(1),
                                match_title_artists.group(2), release_type))

                    title = match_title_artists.group(1)
                    title = title.replace("X-EROS#", "X-EROS #")
                    artists = set(
                        match_title_artists.group(2).replace('ほか',
                                                             '').split('/'))
                    if len(artists) > 1:
                        book_type = 'magazine'
                    else:
                        book_type = ''
                    wanted_gallery, created = WantedGallery.objects.get_or_create(
                        title_jpn=title,
                        search_title=format_title_to_wanted_search(title),
                        publisher=publisher,
                        defaults={
                            'title': title,
                            'book_type': book_type,
                            'add_as_hidden': True,
                            'category': 'Manga',
                            'reason': 'wanimagazine',
                            'public': own_settings.add_as_public
                        })
                    if created:
                        wanted_gallery.should_search = True
                        wanted_gallery.keep_searching = True
                        wanted_gallery.save()
                        local_logger.info(
                            "Created wanted gallery (magazine): {}, search title: {}"
                            .format(wanted_gallery.get_absolute_url(), title))
                    announce, announce_created = wanted_gallery.announces.get_or_create(
                        announce_date=announce_date,
                        release_date=release_date,
                        type=release_type,
                        source=source,
                    )
                    if announce_created and cover_url:
                        announce.save_img(cover_url)
                        # wanted_gallery.calculate_nearest_release_date()
                        wanted_gallery.release_date = release_date
                        wanted_gallery.save()

                    for artist in artists:
                        artist_obj = Artist.objects.filter(
                            name_jpn=artist).first()
                        if not artist_obj:
                            artist_obj = Artist.objects.create(name=artist,
                                                               name_jpn=artist)
                        wanted_gallery.artists.add(artist_obj)

                match_artist_title = re.search('^(.+?)『(.+?)』.*',
                                               match_tweet_type.group(2),
                                               re.DOTALL)
                if match_artist_title and release_type:

                    local_logger.info(
                        "Matched pattern (artist: {}, title: {}), release type: {}."
                        .format(match_artist_title.group(1),
                                match_artist_title.group(2), release_type))

                    artist = match_artist_title.group(1)
                    title = match_artist_title.group(2)
                    title = title.replace("X-EROS#", "X-EROS #")
                    cover_artist = None
                    book_type = None
                    if '最新刊' in artist:
                        artist = artist.replace('最新刊', '')
                        book_type = 'new_publication'
                        cover_artist = Artist.objects.filter(
                            name_jpn=artist).first()
                        if not cover_artist:
                            cover_artist = Artist.objects.create(
                                name=artist, name_jpn=artist)
                    elif '初単行本' in artist and ('『' not in artist
                                               and '』' not in artist):
                        artist = artist.replace('初単行本', '')
                        book_type = 'first_book'
                        cover_artist = Artist.objects.filter(
                            name_jpn=artist).first()
                        if not cover_artist:
                            cover_artist = Artist.objects.create(
                                name=artist, name_jpn=artist)
                    elif '表紙が目印の' in artist:
                        artist = artist.replace('表紙が目印の', '')
                        book_type = "magazine"
                        cover_artist = Artist.objects.filter(
                            name_jpn=artist).first()
                        if not cover_artist:
                            cover_artist = Artist.objects.create(
                                name=artist, name_jpn=artist)
                    if book_type:
                        wanted_gallery, created = WantedGallery.objects.update_or_create(
                            title_jpn=title,
                            search_title=format_title_to_wanted_search(title),
                            publisher=publisher,
                            defaults={
                                'cover_artist': cover_artist,
                                'title': title,
                                'book_type': book_type,
                                'add_as_hidden': True,
                                'category': 'Manga',
                                'reason': 'wanimagazine',
                                'public': own_settings.add_as_public
                            })
                        if created:
                            wanted_gallery.should_search = True
                            wanted_gallery.keep_searching = True
                            wanted_gallery.save()
                            local_logger.info(
                                "Created wanted gallery (anthology): {}, search title: {}"
                                .format(wanted_gallery.get_absolute_url(),
                                        title))
                        announce, announce_created = wanted_gallery.announces.get_or_create(
                            announce_date=announce_date,
                            release_date=release_date,
                            type=release_type,
                            source=source,
                        )
                        if announce_created and cover_url:
                            announce.save_img(cover_url)
                            # wanted_gallery.calculate_nearest_release_date()
                            wanted_gallery.release_date = release_date
                            wanted_gallery.save()

                        artist_obj = Artist.objects.filter(
                            name_jpn=artist).first()
                        if not artist_obj:
                            artist_obj = Artist.objects.create(name=artist,
                                                               name_jpn=artist)
                        wanted_gallery.artists.add(artist_obj)
            else:
                local_logger.info(
                    "Created tweet id: {} did not match the pattern".format(
                        tweet_obj.tweet_id))

    if not all([getattr(own_settings, x) for x in CREDENTIALS]):
        ext_logger.error(
            'Cannot work with Twitter unless all credentials are set.')
        return

    t = Twitter(auth=OAuth(
        own_settings.token,
        own_settings.token_secret,
        own_settings.consumer_key,
        own_settings.consumer_secret,
    ))
    tweet_posts = TweetPost.objects.all()
    if tweet_posts:
        max_id = tweet_posts.aggregate(Max('tweet_id'))['tweet_id__max']
        while True:
            ext_logger.info("Fetching since tweet id: {}".format(max_id))
            tweets = t.statuses.user_timeline(screen_name='wanimagazine',
                                              include_rts=False,
                                              exclude_replies=True,
                                              trim_user=True,
                                              count=200,
                                              since_id=max_id)
            if not tweets:
                ext_logger.info("No more tweets to fetch, ending")
                break
            new_max_id = max(tweets, key=lambda x: x['id'])['id']
            process_wani_tweets(tweets, local_logger=ext_logger)
            if new_max_id == max_id:
                ext_logger.info(
                    "No more new tweets fetched, stopping at: {}".format(
                        max_id))
                break
            else:
                max_id = new_max_id
    else:
        min_id = None
        while True:
            if min_id:
                ext_logger.info(
                    "Fetching backwards with max id: {}".format(min_id))
                tweets = t.statuses.user_timeline(screen_name='wanimagazine',
                                                  include_rts=False,
                                                  exclude_replies=True,
                                                  trim_user=True,
                                                  count=200,
                                                  max_id=min_id)
            else:
                ext_logger.info("Starting from newer tweet.")
                tweets = t.statuses.user_timeline(screen_name='wanimagazine',
                                                  include_rts=False,
                                                  exclude_replies=True,
                                                  trim_user=True,
                                                  count=200)
            if not tweets:
                ext_logger.info("No more tweets to fetch, ending")
                break
            new_min_id = min(tweets, key=lambda x: x['id'])['id']
            process_wani_tweets(tweets, local_logger=ext_logger)
            if new_min_id == min_id:
                ext_logger.info(
                    "No more new tweets fetched, stopping at: {}".format(
                        min_id))
                break
            else:
                min_id = new_min_id
Пример #21
0
    def __init__(self):
        self.aouth = OAuth(Twitter_Access_Token,Twitter_Access_TokenSecret,Twitter_Consumer_Key,Twitter_Consumer_Secret)



        self.twitter_search= Twitter(auth=self.aouth)
Пример #22
0
# encoding: utf-8
from __future__ import unicode_literals

from random import choice
import time
import pickle
import json

from twitter import Twitter, NoAuth, OAuth, read_token_file, TwitterHTTPError
from twitter.api import TwitterDictResponse, TwitterListResponse
from twitter.cmdline import CONSUMER_KEY, CONSUMER_SECRET

noauth = NoAuth()
oauth = OAuth(*read_token_file('tests/oauth_creds')
              + (CONSUMER_KEY, CONSUMER_SECRET))

twitter11 = Twitter(domain='api.twitter.com',
                    auth=oauth,
                    api_version='1.1')

twitter11_na = Twitter(domain='api.twitter.com',
                       auth=noauth,
                       api_version='1.1')

AZaz = "abcdefghijklmnopqrstuvwxyz1234567890ABCDEFGHIJKLMNOPQRSTUVWXYZ"


def get_random_str():
    return ''.join(choice(AZaz) for _ in range(10))

logging.basicConfig(filename=env['LOG_FILE'],level=logging.DEBUG)

# CloudMQTT config items
def on_publish(client, userdata, mid):
    logging.info("mid: "+str(mid))


client = paho.Client()
client.on_publish = on_publish
client.connect('localhost', 8833)


# Twitter app config items
stream = TwitterStream(
    auth=OAuth(
        env['TW_ACCESS_TOKEN'],
        env['TW_ACCESS_SECRET'],
        env['TW_CONSUMER_KEY'],
        env['TW_CONSUMER_SECRET']
    )
)
tweets = stream.statuses.filter(track=env['HASH_TAGS'])

for tweet in tweets:
    if 'RT' not in tweet['text']:
        logging.debug(tweet['text'])
        msg_info = client.publish(env['TW_STREAM_TOPIC'], tweet['text'])

        if not msg_info.is_published():
            logging.error('Message is not yet published.')
Пример #24
0
from watson_developer_cloud import NaturalLanguageUnderstandingV1
from watson_developer_cloud.natural_language_understanding_v1 import Features, EntitiesOptions, SentimentOptions
from kafka import KafkaProducer
from random import randrange, uniform
import boto3

producer = KafkaProducer(
    value_serializer=lambda m: json.dumps(m).encode('ascii'),
    bootstrap_servers=config.KAFKA_SERVER)

natural_language_understanding = NaturalLanguageUnderstandingV1(
    version=config.WATSON_VERSION,
    username=config.WATSON_USERNAME,
    password=config.WATSON_PASSWORD)
gmaps = googlemaps.Client(key=config.GOOGLE_API_KEY)
oauth = OAuth(config.ACCESS_TOKEN, config.ACCESS_SECRET, config.CONSUMER_KEY,
              config.CONSUMER_SECRET)
twitter_stream = TwitterStream(auth=oauth)

sns = boto3.client('sns', region_name='us-east-1')


def handler(event, context):
    iterator = twitter_stream.statuses.filter(track=event['keyword'])
    count = 10
    result = []
    for tweet in iterator:
        try:
            text = tweet['text']
        except Exception as e:
            continue
        if tweet['lang'] == 'en':
RAW_TWEET_DIR = 'raw_tweet'

# maybe create raw_tweet dir
if not os.path.exists(RAW_TWEET_DIR):
    os.makedirs(RAW_TWEET_DIR)

# retrieve credentials
# twitter dev api
config = {}
execfile(
    "/home/mike/Documents/repoo/information_diffusion/twitter_api/config/config.py",
    config)

# Create twitter API object
api = Twitter(auth=OAuth(config["ACCESS_TOKEN"], config["ACCESS_TOKEN_SECRET"],
                         config["CONSUMER_KEY"], config["CONSUMER_SECRET"]))


def datetime_filename(prefix='output_'):
    outputName = prefix + '{:%Y%m%d%H%M%S}utc.txt'.format(
        datetime.datetime.utcnow())
    return outputName


def scrape(tweets_per_file=10000):
    f = open(datetime_filename(prefix='{0}/en_tweet_'.format(RAW_TWEET_DIR)),
             'w')
    tweet_count = 0
    try:
        for line in api.GetStreamSample():
            if 'text' in line and line['lang'] == u'en':
Пример #26
0
 def __init__(self):
     self.__current_thread_ident = None
     self.__oauth = OAuth(**OAUTH_INFO)
     #self.__es = Elasticsearch()
     self.__es = Elasticsearch([ELASTICSEARCH_IP])
Пример #27
0
    import json
except ImportError:
    import simplejson as json

import os

# Import the necessary methods from "twitter" library
from twitter import Twitter, OAuth, TwitterHTTPError, TwitterStream

# Variables that contains the user credentials to access Twitter API
ACCESS_TOKEN = '918639046280253440-UJ5I3x4Ru0MhBLyVxyPefNXYq9c7KJg'
ACCESS_SECRET = 'Kj8XelBrq7hS8oR5H0eqfyjJzFNJb4y9pP1pkSOdZnp8R'
CONSUMER_KEY = 'bMdrEm9OEHKYeenT6OXVbsoo7'
CONSUMER_SECRET = 'P6pbJA9MEa2VpDBHvEJ3BBp1XJla66mTOlgdGpMOzPqa8LSsc1'

oauth = OAuth(ACCESS_TOKEN, ACCESS_SECRET, CONSUMER_KEY, CONSUMER_SECRET)

# Initiate the connection to Twitter Streaming API
twitter_stream = TwitterStream(auth=oauth)

# Get a sample of the public data following through Twitter
# iterator = twitter_stream.statuses.filter(track="", language="en")
iterator = twitter_stream.statuses.sample()

# Print each tweet in the stream to the screen
# Here we set it to stop after getting 1000 tweets.
# You don't have to set it to stop, but can continue running
# the Twitter API to collect data for days or even longer.
tweet_count = 1000

sample_file = open('twitter_stream_samples3.txt', 'w')
import pandas as pd
from twitter import Twitter
from twitter import OAuth
import re

import json
from pandas.io.json import json_normalize

ck = 'QvGBrXMEZhicazT2XzvK1usgY' #consumer key
cs = 'TyYiFfvYszrjtoikhn5dlMYWljuEPGQu3iUlPZmByZAgKwTgP2' #consumer key secret
at = '954075809018470400-2TRhibAbIA7OBhPUXuf6xE8NIMTWa17'
ats = 'qbg2ibQ4QILAIYidy9kdANZ5cxSO2a9jCPhOIYmsGm1Wv'

oauth = OAuth(at,ats,ck,cs)

api = Twitter(auth=oauth)

df = pd.DataFrame()
mid = 0
for i in range(10):
    if i == 0:
        search_tw = api.search.tweets(q="from:sreekanth324", count=100, tweet_mode='extended')
    else:
        search_tw = api.search.tweets(q="from:sreekanth324", count=100, max_id=mid, tweet_mode='extended')

    dftemp = json_normalize(search_tw, 'statuses')
    #     mid = dftemp1['id'].min()
    #     mid=mid-1
    for j in range(0, len(dftemp.index)):
        if dftemp['id'][j] != None:
            if mid == 0:
Пример #29
0
    if config.LANGUAGE:
        WORDLIST = wordlist(config.LANGUAGE)
except ImportError:
    WORDLIST = [
        ['The space'],
        ['is'],
        ['open'],
        ['closed'],
        [''],
        ['']
    ]

try:
    twitter = Twitter(auth=OAuth(
                            config.OAUTH_TOKEN,
                            config.OAUTH_SECRET,
                            config.CONSUMER_KEY,
                            config.CONSUMER_SECRET))
except Exception as e:
    print('Error in twitter init: ' + e)
    exit(255)


def write_status(status):
    status_file = open(config.STATUS_FILE, 'w+')
    status_file.write(json.dumps({'status': status}))


def generate_phrase(open_status=True):
    phrase = choice(WORDLIST[0]) + " "
    phrase += choice(WORDLIST[1]) + " "
Пример #30
0
    def bot_setup(self, config_file="config.txt"):

        with open(config_file, "r") as in_file:
            for line in in_file:
                line = line.split(":")
                parameter = line[0].strip()
                value = line[1].strip()

                if parameter in [
                        "USERS_KEEP_FOLLOWING", "USERS_KEEP_UNMUTED",
                        "USERS_KEEP_MUTED"
                ]:
                    if value != "":
                        self.BOT_CONFIG[parameter] = set(
                            [int(x) for x in value.split(",")])
                    else:
                        self.BOT_CONFIG[parameter] = set()
                elif parameter in [
                        "FOLLOW_BACKOFF_MIN_SECONDS",
                        "FOLLOW_BACKOFF_MAX_SECONDS"
                ]:
                    self.BOT_CONFIG[parameter] = int(value)
                else:
                    self.BOT_CONFIG[parameter] = value

        required_parameters = [
            "OAUTH_TOKEN", "OAUTH_SECRET", "CONSUMER_KEY", "CONSUMER_SECRET",
            "TWITTER_HANDLE", "ALREADY_FOLLOWED_FILE", "FOLLOWERS_FILE",
            "FOLLOWS_FILE"
        ]

        missing_parameters = []

        for required_parameter in required_parameters:
            if (required_parameter not in self.BOT_CONFIG
                    or self.BOT_CONFIG[required_parameter] == ""):
                missing_parameters.append(required_parameter)

        if len(missing_parameters) > 0:
            self.BOT_CONFIG = {}
            raise Exception(
                "Please edit %s to include the following parameters: %s.\n\n"
                "The bot cannot run unless these parameters are specified." %
                (config_file, ", ".join(missing_parameters)))

        for sync_file in [
                self.BOT_CONFIG["ALREADY_FOLLOWED_FILE"],
                self.BOT_CONFIG["FOLLOWS_FILE"],
                self.BOT_CONFIG["FOLLOWERS_FILE"]
        ]:
            if not os.path.isfile(sync_file):
                with open(sync_file, "w") as out_file:
                    out_file.write("")

        if (time.time() - os.path.getmtime(self.BOT_CONFIG["FOLLOWS_FILE"]) >
                86400 or time.time() -
                os.path.getmtime(self.BOT_CONFIG["FOLLOWERS_FILE"]) > 86400):
            print(
                "Warning: Your Twitter follower sync files are more than a day old. "
                "It is highly recommended that you sync them by calling sync_follows() "
                "before continuing.",
                file=sys.stderr)

        self.TWITTER_CONNECTION = Twitter(auth=OAuth(
            self.BOT_CONFIG["OAUTH_TOKEN"], self.BOT_CONFIG["OAUTH_SECRET"],
            self.BOT_CONFIG["CONSUMER_KEY"],
            self.BOT_CONFIG["CONSUMER_SECRET"]))