Exemplo n.º 1
0
    def __init__(self,
                 appMode=None,
                 ckey=None,
                 csecret=None,
                 atoken=None,
                 asecret=None):
        __appMode = appMode
        # consumer key, consumer secret, access token, access secret.
        # TODO: should move to database for configurable
        ckey = "uR8U5VlNwZiPdVx90hg2vpA8s" if ckey is None else ckey
        csecret = "xqPthljYXU18LSggR8kqtGlyUwjIcsfRknCXpXLGtVh6XKJI2M" if csecret is None else csecret
        atoken = "942670843783663616-6gLK5Uhdwnn8TxVTTVlMybfDUJCpOi5" if atoken is None else atoken
        asecret = "gw3Wez40PtSk8ykXMxUq4dzXiZSEdEeLVBJ4xFHTyShII" if asecret is None else asecret

        # App Mode Authentication or Access Token Mode
        if appMode:
            auth = AppAuthHandler(ckey, csecret)
        else:
            auth = OAuthHandler(ckey, csecret)
            auth.set_access_token(atoken, asecret)

        self.__api = API(auth,
                         wait_on_rate_limit=True,
                         wait_on_rate_limit_notify=True)
        super()
Exemplo n.º 2
0
    def get_conn(self):
        """
        Sign into Twitter.
        If we have already signed in, this will just return the original object
        """
        if self.t:
            return self.t

        # Default authentication type is 2 if auth_type is not specified
        if self.auth_type == 1:
            # connect to Twitter with OAuth1 (user context)
            auth = OAuthHandler(os.getenv('TWITTER_CONSUMER_API_KEY'),
                                os.getenv('TWITTER_CONSUMER_API_SECRET_KEY'))
            auth.set_access_token(os.getenv('TWITTER_ACCESS_TOKEN'),
                                  os.getenv('TWITTER_ACCESS_TOKEN_SECRET'))
        elif self.auth_type == 2:
            # connect to Twitter with OAuth2 (app context)
            auth = AppAuthHandler(os.getenv('TWITTER_CONSUMER_API_KEY'),
                                  os.getenv('TWITTER_CONSUMER_API_SECRET_KEY'))
        else:
            raise ValueError("Invalid auth_type %d . Valid AuthTypes are: 1,2",
                             self.auth_type)

        self.t = auth

        return self.t
Exemplo n.º 3
0
 def __init__(self, config_obj):
     super().__init__(config_obj)
     auth_wallet = AppAuthHandler(
         self.config.TWITTER_CONSUMER_KEY,
         self.config.TWITTER_CONSUMER_SECRET,
     )
     self.client = API(auth_wallet)
def get_auth(tipo):
    if tipo == 'stream':
        auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
        auth.set_access_token(access_key, access_secret)
    else:
        auth = AppAuthHandler(consumer_key, consumer_secret)
    return auth
Exemplo n.º 5
0
def main():
    logging.basicConfig(filename='road_harvest.log',
                        filemode='w',
                        level=logging.DEBUG)

    # access database
    couch = couchdb.Server(settings.database_address)
    try:
        db = couch.create(settings.database)
    except couchdb.http.PreconditionFailed as e:
        db = couch[settings.database]

    # application-only authentication
    auth = AppAuthHandler(settings.consumer_key, settings.consumer_secret)
    api = tweepy.API(auth,
                     wait_on_rate_limit=True,
                     wait_on_rate_limit_notify=True)

    # start twitter harvester
    print 'start'
    logging.info('start harvesting')
    while True:
        with open(settings.csv) as csvfile:
            reader = csv.DictReader(csvfile)
            for row in reader:
                search(row, db, api)
Exemplo n.º 6
0
def main():
    # access database
    couch = couchdb.Server(settings.database_address)
    try:
        db = couch.create(settings.database)
    except couchdb.http.PreconditionFailed as e:
        db = couch[settings.database]

    # application-only authentication for all key-pairs
    apis = []
    for app in settings.apps:
        auth = AppAuthHandler(app['consumer_key'], app['consumer_secret'])
        api = tweepy.API(auth,
                         wait_on_rate_limit=True,
                         wait_on_rate_limit_notify=True)
        apis.append(api)

    rows_total = row_count()
    apis_total = len(apis)

    # initialize and start all harvester threads
    for i in range(apis_total):
        start, end = calc_boundary(i, rows_total, apis_total)
        road_harvest.thread = road_harvest.Harvest(apis[i], db, start, end)
        road_harvest.thread.start()
Exemplo n.º 7
0
    def on_data(self, data):
        print("Found something")
        try:
            decoded = json.loads(data)
        except:
            print("JSON couldn't be loaded")

        name = decoded['user']['name'].encode("utf-8", errors='ignore')
        user_id = decoded['user']['id'] #.encode("utf-8", errors='ignore')
        tweet_text = decoded['text'].encode("utf-8", errors='ignore')

        #Add number of times the user has used the search criteria in the past
        tweet_count = 0
        retweet_count = 0

        try:
            auth = AppAuthHandler(consumer_key, consumer_secret)
            auth.apply_auth()
            api = API(auth)

            users_tweets = api.user_timeline(id=user_id, count="500")
            for users_tweet in users_tweets:
                if (re.search(r'flash[ ]?mob[s]?', users_tweet.text, flags=re.IGNORECASE)):
                    tweet_count = tweet_count + 1
                    #Check how many times the past tweets have been retweeted
                    retweet_count = retweet_count + len(api.retweets(id=users_tweet.id))

            #Checks how many followers this person has
            followers_count = len(api.followers_ids(id=user_id))

        except:
            print("Something went wrong when gathering more data")
            return True

        print("User ID:       ", user_id)
        print("Tweet count:   ", tweet_count)
        print("retweet_count: ", retweet_count)
        print()

        post = {"user_id": user_id,
                "tweet": tweet_text.decode(encoding='utf-8',errors='ignore'),
                "tweet_count": tweet_count,
                "retweet_count": retweet_count}

        posts.insert(post)

        return True
Exemplo n.º 8
0
def collect_tweets():
         #This handles Twitter authetification and the connection to Twitter Streaming API
    l = StdOutListener()
    auth = AppAuthHandler(consumer_key, consumer_secret)
    #auth.set_access_token(access_token, access_token_secret)
    stream = Stream(auth, l)
    api = API(auth,wait_on_rate_limit=True,wait_on_rate_limit_notify=True)

    keywords = ['#phoneless', '#no-phone', '#nophone', 'phoneless', 'nophone','"lost communication"','"no communication"','"no internet"','"no signal"','"nomophobia"','"phubbing"', '"mobile addiction"', '"Ringxiety"','"textaphrenia"', '"Phantom Ringing"','"Phantom Vibration"','Communifaking']
    maxtweets=100000
    tweetsperQry=100
    sinceID=None
    max_id=-1
    tweetCount=0
    if len(sys.argv[2:]) != 0:
        keywords_s = " OR ".join(sys.argv[2:])
        keywords_f = sys.argv[2:]
    else:
        keywords_s = " OR ".join(keywords)
        keywords_f = keywords

    #This line search Twitter Streams to capture data by the keywords: 'python', 'javascript', 'ruby'
    if sys.argv[1]=='s' or sys.argv[1]=='search':
        data = api.rate_limit_status()
        print("limit", data['resources']['search'])
        with open("results_{}.csv".format(abs(max_id)), "a") as fd:
            while tweetCount<maxtweets:
                tweets=""
                try:
                    if (max_id<=0):
                        search_results =api.search(q=keywords_s,tweet_mode="extended",lang="en",count=tweetsperQry)
                    else:
                        search_results= api.search(q=keywords_s, tweet_mode="extended", lang="en", count=tweetsperQry,max_id=str(max_id - 1))
                    for tweet in search_results:
                        #print("$$$$$$$$$$$",tweet.full_text)
                        if (not tweet.retweeted) and ('RT @' not in tweet.full_text):
                            #print("***********************************without RT",tweet.full_text.replace("\r\n", "").replace("\n", "") + "\n")
                            tweets = tweets + tweet.full_text.replace("\r\n", "").replace("\n", "") + "\n"
                    data = api.rate_limit_status()
                    #print("????????????????/limit",data['resources']['search'])
                    tweetCount += len(search_results)
                    #print(")))))))))))",tweetCount)
                    #break
                    max_id = search_results[-1].id
                    #print(tweets)
                    fd.write(tweets)
                except TypeError as e:
                    print(e)
                    print("stopped at {}".format(max_id))
                except Exception as e:
                    print(e)
                    print("stopped at {}".format(max_id))
                except KeyboardInterrupt as e:
                    print("stopped at {}".format(max_id))
                    sys.exit()

    elif sys.argv[1]=='f'or sys.argv[1]=='filter':
         #This line filter Twitter Streams to capture data by the keywords: 'python', 'javascript', 'ruby'
        stream.filter(languages=["en"],track=keywords_f)
def create_twitter_object():
	# OAuth process
	auth = AppAuthHandler(CONSUMER_KEY, CONSUMER_API_SECRET)
	#auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET)
	# ceate tweepy api object -
	# tell it to wait how much ever needed in case we reached rate limit and notify me too
	api = API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True)
	return api
Exemplo n.º 10
0
def search_auth():
    auth = AppAuthHandler(SEARCH_CKEY, SEARCH_CSECRET)
    api = tweepy.API(
        auth,
        wait_on_rate_limit=True,
        wait_on_rate_limit_notify=True,
        parser=tweepy.parsers.JSONParser()
    )
    return api
Exemplo n.º 11
0
def tweepy_auth(credentials, user=False):
    if user:
        auth = OAuthHandler(credentials['consumer_key'],
                            credentials['consumer_secret'])
        auth.set_access_token(credentials['access_token'],
                              credentials['access_token_secret'])
        return auth
    else:
        return AppAuthHandler(credentials['consumer_key'],
                              credentials['consumer_secret'])
def main():
    consumer_key = os.environ.get('TWEEPY_CONSUMER_KEY')
    consumer_secret = os.environ.get('TWEEPY_CONSUMER_SECRET')

    api_auth = AppAuthHandler(consumer_key, consumer_secret)
    app_api = API(api_auth,
                  wait_on_rate_limit_notify=True,
                  wait_on_rate_limit=True)

    # Set up the database
    database_path = 'amplification.db'
    engine = create_engine('sqlite:///{}'.format(database_path))
    session_factory = sessionmaker(bind=engine)
    session = scoped_session(session_factory)
    Base.metadata.create_all(engine)

    args = parse_args()

    try:
        crawler = Queue()
        crawler.put(args.seed_acount)
        accounts_seen = set()
        while not crawler.empty():
            account_id = crawler.get()
            tweets = get_tweets(app_api, account_id)
            tweet_crawl_date = datetime.utcnow()
            prioritized_list = process_tweets(tweets)
            for item in prioritized_list:
                tweet = item[1]
                author = tweet['user']
                logger.info('Found {} amplified tweets from {}'.format(
                    len(prioritized_list), author['screen_name']))
                tweet_node = add_tweet_node(session, tweet['id'], author['id'],
                                            author['screen_name'],
                                            tweet['text'], tweet_crawl_date)
                retweeters = get_retweeters(app_api, tweet['id'])
                for retweeter in retweeters:
                    retweeter_id = retweeter['id']
                    if not is_retweet_bot(app_api, retweeter_id):
                        continue

                    logger.info('\tPotential retweet bot: {}'.format(
                        retweeter['screen_name']))
                    account_crawl_date = datetime.utcnow()
                    account_node = add_account_node(session, retweeter['id'],
                                                    retweeter['screen_name'],
                                                    account_crawl_date)
                    add_edge(session, account_node, tweet_node)
                    if retweeter_id in accounts_seen:
                        continue
                    accounts_seen.add(retweeter_id)
                    crawler.put(retweeter_id)

    except KeyboardInterrupt:
        print('CTRL+C received... shutting down')
Exemplo n.º 13
0
def getTweets(lat, lang):

    auth = AppAuthHandler(twitter_credentials.CONSUMER_KEY,
                          twitter_credentials.CONSUMER_SECRET)
    api = tweepy.API(auth, wait_on_rate_limit=True, retry_count=3, timeout=60)

    search_word = "lockdown OR corona OR COVID OR quarantine OR WHO OR stayhome OR socialdistancing OR pandemic OR virus OR selfisolation OR lockdownbirthday"
    date = "2020-03-23"  #Tweets will be from this date

    km = "150km"  #This km is for Geocoding
    geo = '"' + lat + ',' + lang + "," + km + '"'
    t0 = time.time()
    tweets = tweepy.Cursor(api.search,
                           q=search_word,
                           since=date,
                           geocode=geo,
                           lang="en",
                           tweet_mode='extended').items(1000)

    ##creating dataframe for tweets data

    df = pd.DataFrame([
        tweet._json['retweeted_status']['full_text']
        if 'retweeted_status' in tweet._json else tweet.full_text
        for tweet in tweets
    ],
                      columns=["Tweets"])
    t1 = time.time()
    print("time for fetching tweets:", (t1 - t0))
    if len(df.index) == 0:

        km = "300km"  #This km is for Geocoding
        geo = '"' + lat + ',' + lang + "," + km + '"'
        tweets = tweepy.Cursor(api.search,
                               q=search_word,
                               since=date,
                               geocode=geo,
                               lang="en",
                               tweet_mode='extended').items(1000)
        ##creating dataframe for tweets data
        print("workingg")
        df = pd.DataFrame([
            tweet._json['retweeted_status']['full_text']
            if 'retweeted_status' in tweet._json else tweet.full_text
            for tweet in tweets
        ],
                          columns=["Tweets"])

    ##calling cleanTxt function for preprocessing the data

    df['Tweets'] = df['Tweets'].apply(cleanTxt)
    df['Tweets'] = df['Tweets'].apply(stop_words)
    return df['Tweets']
Exemplo n.º 14
0
def main(argv):

    global posts
    global consumer_key
    global consumer_secret
    global access_token
    global access_secret

    mode = -1
    geocode = False

    for arg in argv:
        if arg == "--test":
            mode = 0
        elif arg == "--cont":
            mode = 1
        elif arg == "--nyc":
            geocode = True
        elif arg == "--help":
            print("Usage: main.py (--test|--cont) [--nyc]")
            print("--test is used for a one time test of past tweets")
            print("--cont is used for continuous search of Twitter stream")
            print("--nyc will enable geocoding of tweets to NYC (default is everywhere)")
            return
        else:
            print("Not a valid argument: " + arg)
            return

    if mode < 0:
        print("Need to set a mode. Use --help to see commands")
        return

    #Opens the file containing the consumer key and secret associated with the Twitter app
    try:
        consumer_file = open('secrets.txt', 'r', encoding='utf-8')
        #[:-1] gets rid of the newline character
        consumer_key = consumer_file.readline()[:-1]
        consumer_secret = consumer_file.readline()[:-1]
        access_token = consumer_file.readline()[:-1]
        access_secret = consumer_file.readline()[:-1]
        consumer_file.close()
    except:
        print("Error opening/reading secrets.txt")
        print("Did you create the file and enter the info in the correct format?")
        raise

    if mode == 0:
        auth = AppAuthHandler(consumer_key, consumer_secret)
        auth.apply_auth()
        runTest(auth, geocode)
    elif mode == 1:
        #db stuff
        client = MongoClient()
        db = client.twitter
        posts = db.posts
        #tweepy stuff
        auth = OAuthHandler(consumer_key, consumer_secret)
        auth.set_access_token(access_token, access_secret)
        runCont(auth, geocode)
    return
Exemplo n.º 15
0
    def authenticate_twitter_app(self, consumer_key, consumer_secret):
        """
        Twitter API authentication.
        :param consumer_key: str
            Consumer key.
        :param consumer_secret: str
            Consumer secret key.

        :return: auth object
        """
        CONSUMER_KEY = consumer_key
        CONSUMER_SECRET = consumer_secret
        auth = AppAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)  # higher rate limit than OAuthHandler
        return auth
Exemplo n.º 16
0
    def __init__(self):
        with open("app_settings.json", "r") as app_settings_file:
            self.app_settings = jsonpickle.decode(app_settings_file.read())
        self.tweets_file = "tweets_4.json"
        try:
            self.auth = AppAuthHandler(self.app_settings["consumer_key"],
                                       self.app_settings["consumer_secret"])
            self.api = tweepy.API(self.auth,
                                  wait_on_rate_limit=True,
                                  wait_on_rate_limit_notify=True)
        except:
            print("Error: Authentication failed")

        main_app_settings["db_name"] = self.app_settings["db_name"]
        main_app_settings["db_user"] = self.app_settings["db_user"]
        main_app_settings["db_password"] = self.app_settings["db_password"]
Exemplo n.º 17
0
    def __init__(self):

        # API keys from the Twitter Dev Console
        # NOTE : Add yours here
        api_key = 'YOUR API KEY HERE'
        api_key_secret = 'YOUR API SECRET HERE'

        # Attempt authentication
        self.auth = AppAuthHandler(api_key, api_key_secret)
        # Create tweepy API object to get tweets, wait if we reach the rate limit (45,000 tweets/15 mins if using free API)
        self.api = tweepy.API(self.auth,
                              wait_on_rate_limit=True,
                              wait_on_rate_limit_notify=True)
        if (not self.api):
            print("Can't authenticate")
            sys.exit(-1)
Exemplo n.º 18
0
    def __init__(self):

        # API keys from the Twitter Dev Console
        # NOTE : Add yours here
        api_key = 'KOtMxbHtuRodKEHt7aZLdBAFi'
        api_key_secret = 'nCpjqbtVIXvlxKuH9jd7I5X8ybx6fFnsAWmf1dZjwVrXWrxrOU'

        # Attempt authentication
        self.auth = AppAuthHandler(api_key, api_key_secret)
        # Create tweepy API object to get tweets, wait if we reach the rate limit (45,000 tweets/15 mins if using free API)
        self.api = tweepy.API(self.auth,
                              wait_on_rate_limit=True,
                              wait_on_rate_limit_notify=True)
        if (not self.api):
            print("Can't authenticate")
            sys.exit(-1)
    def __init__(self): 
        ''' 
        Class constructor or initialization method. 
        '''
        # keys and tokens from the Twitter Dev Console - REPLACE X's with your code!
        consumer_key = 'XXXXXXXXXXXXXXXXXXXXXXXXX'
        consumer_secret = 'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX'
        access_token = 'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX'
        access_secret = 'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX'
 
        # create OAuthHandler object 
        self.auth = AppAuthHandler(consumer_key, consumer_secret) 
        # set access token and secret 
        #self.auth.set_access_token(access_token, access_secret) 
        # create tweepy API object to fetch tweets 
        self.api = tweepy.API(self.auth, wait_on_rate_limit=True,wait_on_rate_limit_notify=True) 
Exemplo n.º 20
0
    def get_fresh_connection(self):
        success = False
        while not success:
            try:
                self.index = (self.index + 1) % len(self.auth_data)
                d = self.auth_data[self.index]
                # print("Switching to API Credentials #%d" % self.index)

                auth = AppAuthHandler(d['consumer_key'], d['consumer_secret'])
                self.conn_ = API(auth_handler=auth,
                                 wait_on_rate_limit=False,
                                 wait_on_rate_limit_notify=True)
                self.nreqs = 0
                return self.conn_
            except TweepError as e:
                print("Error trying to connect: %s" % e.message)
                time.sleep(10)
Exemplo n.º 21
0
def get_api_pool(account_config: list) -> Queue:
    """ generates tweepy.api objects for all app and user authentications given in the config """
    account_pool = Queue()
    for account in account_config:
        app_token = account["consumer_key"]
        app_secret = account["consumer_secret"]

        # add app auth
        app_auth = AppAuthHandler(app_token, app_secret)
        account_pool.put(API(app_auth, wait_on_rate_limit=True))

        # add all user auth for this app
        for user_auth in account["user_auth"]:
            auth = OAuthHandler(app_token, app_secret)
            auth.set_access_token(user_auth["access_key"],
                                  user_auth["access_secret"])
            account_pool.put(API(auth, wait_on_rate_limit=True))

    return account_pool
Exemplo n.º 22
0
def authenticate(api_key,
                 api_key_secret,
                 access_token,
                 access_token_secret,
                 wait_on_rate_limit=True,
                 wait_on_rate_limit_notify=True):

    try:
        _auth = AppAuthHandler(api_key, api_key_secret)
    except:
        print("Faild to authenticate.")
        return
    try:
        _api = tweepy.API(auth_handler=_auth,
                          wait_on_rate_limit=True,
                          wait_on_rate_limit_notify=True)
        return _api
    except:
        print('Failed to access Twitter API')
        return
Exemplo n.º 23
0
def main():
    logging.basicConfig(filename='search_harvest.log',
                        filemode='w',
                        level=logging.DEBUG)

    # access database
    couch = couchdb.Server(settings.database_address)
    db = couch[settings.database]

    # application-only authentication
    auth = AppAuthHandler(settings.consumer_key1, settings.consumer_secret1)
    api = tweepy.API(auth,
                     wait_on_rate_limit=True,
                     wait_on_rate_limit_notify=True)

    # start twitter harvester
    print 'start'
    logging.info('start harvesting')
    search(db, api)
    logging.info('finish harvesting')
    print 'finished'
Exemplo n.º 24
0
    def __init__(self, consumer_key, consumer_secret, access_token,
                 access_token_secret, botometer_key):
        # Init the keys and secrets
        self._consumer_key = consumer_key
        self._consumer_secret = consumer_secret
        self._access_token = access_token
        self._access_token_secret = access_token_secret
        self._botometer_key = botometer_key

        # Set up OAuth API; slow but must be used for almost all kinds of
        # requests
        auth = OAuthHandler(self._consumer_key, self._consumer_secret)
        auth.set_access_token(self._access_token, self._access_token_secret)
        self._oauth_api = API(auth)

        # Set up AppAuth API; faster and also allows for more requests, can
        # only be used for certain stuff
        auth = AppAuthHandler(self._consumer_key, self._consumer_secret)
        auth.secure = True
        self._appauth_api = API(auth,
                                wait_on_rate_limit=True,
                                wait_on_rate_limit_notify=True)

        # Botometer object
        self._botometer = botometer.Botometer(
            wait_on_ratelimit=True,
            mashape_key=self._botometer_key,
            consumer_key=self._consumer_key,
            consumer_secret=self._consumer_secret,
            access_token=self._access_token,
            access_token_secret=self._access_token_secret)

        # Constants and filters
        self._mentions_threshold = 100
        self._bot_threshold = 0.6

        # Establish connection to db
        self.connection = psycopg2.connect(**consts.db_creds)
        self.cursor = self.connection.cursor()
Exemplo n.º 25
0
def authenticate_credential(c):
    """
    Setup Twitter API Client.
    :param c: credentials for tweepy.OAuthHandler
    :return: tweepy.API object
    """

    # Setup tweepy to authenticate with Twitter credentials:
    # auth = OAuthHandler(c['CONSUMER_KEY'], c['CONSUMER_SECRET'])
    # auth.set_access_token(c['ACCESS_TOKEN'], c['ACCESS_SECRET'])

    auth = AppAuthHandler(c['CONSUMER_KEY'], c['CONSUMER_SECRET'])

    # Create the api to connect to twitter with your credentials
    # api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True, compression=True)
    api = API(auth,
              compression=True,
              retry_count=int(config.get('CREDENTIAL', 'RETRY_COUNT')),
              retry_delay=int(config.get('CREDENTIAL', 'RETRY_DELAY')))
    if not api:
        logger.error("Authentication error for twitter client")
        return False
    # try:
    #     api.verify_credentials()
    # except TweepError as e:
    #     # if e.api_code == 32:
    #     logger.error(f"Authentication error : {e}")
    #     # send alert
    #     return False
    #
    # except Exception as e:
    #     logger.error(f"Error during authentication of credential . Error : {e}")
    #     # send alert
    #     return False

    return api
Exemplo n.º 26
0
    def __init__(self, cfg):

        # OAuth-Application
        auth = AppAuthHandler(**cfg.AUTH)
        super().__init__(auth, **cfg.WAIT_RATE)

        self.search_settings = cfg.SEARCH
        self.tweet_attr = cfg.TWEET_ATTR
        self.user_attr = cfg.USER_ATTR
        self.order = cfg.DF_COL_ORDER
        self.drop = cfg.DROP_ARGS
        self.today = date.today().strftime("%Y-%m-%d")

        #self.demo_user = "******"

        self.csv_name = cfg.ENV.CSV_PREFIX + self.today + cfg.ENV.CSV_POSTFIX
        self.csv_path = os.path.join(HOME_DIR, cfg.ENV.OUTPUT_DIR,
                                     self.csv_name)
        self.tweets = ""

        if not os.path.isdir(cfg.ENV.OUTPUT_DIR):
            os.makedirs(cfg.ENV.OUTPUT_DIR)

        self.set_since_id()
Exemplo n.º 27
0
def main():
    logging.basicConfig(
        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
        datefmt='%m/%d/%Y %I:%M:%S %p',
        level=logging.INFO)
    logger = logging.getLogger(__name__)

    args = parse_args()
    consumer_key = os.environ.get('TWEEPY_CONSUMER_KEY')
    consumer_secret = os.environ.get('TWEEPY_CONSUMER_SECRET')
    access_token = os.environ.get('TWEEPY_ACCESS_TOKEN')
    access_token_secret = os.environ.get('TWEEPY_ACCESS_TOKEN_SECRET')

    if not (consumer_key and consumer_secret and access_token
            and access_token_secret):
        logger.error('Need to specify the OAuth configuration.')
        sys.exit(1)

    user_auth = OAuthHandler(consumer_key, consumer_secret)
    user_auth.set_access_token(access_token, access_token_secret)
    user_api = API(
        user_auth, wait_on_rate_limit_notify=True, wait_on_rate_limit=True)

    api_auth = AppAuthHandler(consumer_key, consumer_secret)
    app_api = API(
        api_auth, wait_on_rate_limit_notify=True, wait_on_rate_limit=True)

    account_queue = RedisQueue('accounts')
    lookup_queue = RedisQueue('lookup')

    streamer_class = JSONStreamer
    if args.stdout:
        streamer_class = StdoutStreamer

    account_streamer = streamer_class(args.account_filename)

    processes = []

    if args.stream:
        stream_process = Process(
            target=start_streamer,
            args=[user_api, account_queue, lookup_queue],
            kwargs={'query': args.stream_query})
        processes.append(stream_process)
    else:
        logger.info('Skipping stream')

    if args.enum:
        enumerate_process = Process(
            target=fetch_accounts,
            args=[user_api, account_queue],
            kwargs={
                'min_id': args.min_id,
                'max_id': args.max_id,
                'percentage': args.enum_percentage
            })
        processes.append(enumerate_process)
    else:
        logger.info('Skipping enum')

    if args.accounts_from_file:
        accounts_from_file_process = Process(
            target=fetch_accounts_from_file,
            args=[user_api, account_queue],
            kwargs={
                'account_provision_file': args.account_provision_file
            })
        processes.append(accounts_from_file_process)
    else:
        logger.info('Skipping accounts from file')

    # if args.tweets:
    #     fetch_tweets_process = Process(
    #         target=fetch_tweets,
    #         args=[app_api, tweet_streamer],
    #         kwargs={
    #             'lookup_queue': lookup_queue,
    #             'minimum_tweets': args.min_tweets
    #         },
    #     )
    #     processes.append(fetch_tweets_process)
    # else:
    #     logger.info('Skipping tweets')

    # lookup_account_process = Process(
    #     target=start_lookup, args=[app_api, lookup_queue, account_queue])
    # processes.append(lookup_account_process)

    for p in processes:
        p.start()

    # The main loop's job is simple - it simply fetches account dicts coming
    # from the various processes and saves them to the database so the tweet
    # fetcher can process them.
    try:
        account_count = 0
        while True:
            try:
                account = account_queue.get()
                # Verify the account isn't already in our database
                if Account.exists(account['id']):
                    continue
                account_count += 1
                if account_count % CHECKIN_THRESHOLD == 0:
                    logger.info(
                        'Accounts discovered: {}'.format(account_count))
                # Add the account to our database cache
                Account.from_dict(account).save()
                # Write the account to our account streamer
                account_streamer.write_row(account)
            except Exception as e:
                print('Error fetching account: {}'.format(e))
    except KeyboardInterrupt:
        print('\nCtrl+C received. Shutting down...')
        for p in processes:
            p.terminate()
            p.join()
        account_streamer.close()
Exemplo n.º 28
0
@author: prudhveer
"""

import json
import os
import sys
import tweepy
from tweepy import API
from tweepy import OAuthHandler
from tweepy import AppAuthHandler
from tweepy import Cursor

consumer_key = 'bFfMzi17BoJp6KmtYJELlXP2K'
consumer_secret = 'lVga8tf5WLoQArnsXg7kZ0NBOq11UuTCUYW3tPpeSxmHwtEO2r'
auth = AppAuthHandler(consumer_key, consumer_secret)
api = API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True)

if __name__ == '__main__':
    user = sys.argv[1]
    #client=get_twitter_client()
    fname = "{}_only_tweets.json".format(user)

non_bmp_map = dict.fromkeys(range(0x10000, sys.maxunicode + 1), 0xfffd)
replies = []
replies1 = []
replies2 = []


def get_reply1(user, tweet_id):
    sinceID = tweet_id
Exemplo n.º 29
0
import tweepy
from tweepy import AppAuthHandler
from pymongo import MongoClient

consumer_key = 'AtSpb7cH7fxCULkMZigy3w4LB'
consumer_secret = 'y7KI2sJfskZZzyQ4RhImnB3dPqaxVXp1IqNp21IxR7oATH8nVA'
#access_token = '965458435033653248-ZWv7CACfWAVQEq8bTzHlFifEijjhYmO'
#access_secret = 'L7TNG2GpyVFwlCngKTdP2RQ5cn1fmw3lfBpxg8zbJlVPA'
 
auth = AppAuthHandler(consumer_key, consumer_secret) #To increase the rate of queries
#auth.set_access_token(access_token, access_secret)
api = tweepy.API(auth, wait_on_rate_limit=True,wait_on_rate_limit_notify=True)

if (not api): #Error handling
	print ("Problem connecting to API")

#Connect to Mongodb
client = MongoClient('mongodb://localhost/twitterdb')  #assuming there is already a database called twitterdb
db=client.twitterdb

#Gets replies to all tweets collected
def get_reply(user,list_tweet_ids):
	print("Getting Reply tweets of user "+user)
	c=0    #Counter
	for tweet in tweepy.Cursor(api.search,q='to:'+user).items():
		if 'in_reply_to_status_id_str' in tweet._json:
			if (tweet._json['in_reply_to_status_id_str'] in list_tweet_ids):
				if user=='realDonaldTrump':
					db.trump_replies.insert_one(tweet._json)
				if user=='elonmusk':
					db.elon_replies.insert_one(tweet._json)
Exemplo n.º 30
0
import datetime
import json
import gzip
import sys
import time
from tweepy import API
from tweepy import AppAuthHandler
from tweepy import Cursor

CONSUMER_KEY = 'NHsKGfxrXTXlf2mfH2n0jbW1l'
CONSUMER_SECRET = 'W0HE0cTlfIcJtkIX5hClcH4ILgyv018Q8fWdo0sgRo5bdFzAMA'
ACCESS_TOKEN = '339641100-VOI2SsKVbSsQIfnHNSDohSJ4aB9rJpSXkDeYaeo3'
ACCESS_SECRET = 'wMUjClr78yjlsyWIjxVunFKQ8zYOjlzgMfItuRiec5Y3c'

auth = AppAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
api = API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True)

q = '#facebookgate OR #cambridgeanalytica OR ' \
    '#deletefacebook OR #Zuckerberg'
items, item_counter = 1000000, 1
remaining_searches = int(api.rate_limit_status()['resources']['search']
                         ['/search/tweets']['remaining'])
since, until = sys.argv[1], sys.argv[2]
fname = 'tweets-' + since.split('-')[2] + '-' + until.split('-')[2]

with gzip.open('../testing/' + fname + '.json.gz', 'w') as f:
    f.write('{ "tweets": [')
    for tweet in Cursor(api.search, q=q, since=since, until=until,
                        count=100).items(items):
        f.write(
            json.dumps(tweet._json, separators=(',', ': '), sort_keys=True) +
Exemplo n.º 31
0
    def extract_historic_tweets(self, keywords):

        # Get twitter handle
        auth = AppAuthHandler(self.consumer_key, self.consumer_secret)
        api = API(auth,
                  wait_on_rate_limit=True,
                  wait_on_rate_limit_notify=True)
        if not api:
            print("Error while authenticating with twitter")
            sys.exit(-1)

        # Form search query from the keywords
        search_query = ' OR '.join(keywords)

        if self.tweets_data_path:
            # Create output file name for storing raw twitter data
            current_date = datetime.datetime.today().strftime('%Y-%m-%d')
            output_dir = self.tweets_data_path + '/' + self.event + '/'
            output_file = output_dir + 'data_historic.json'
            try:
                os.makedirs(output_dir)
            except FileExistsError:
                pass

        # Set query params
        max_tweets = 1000000000  # Some arbitrary large number
        tweets_per_query = 100  # this is the max the API permits
        '''
           params for maintaining context/continuity across the entire query
           
           1. If results from a specific ID onwards are required, set since_id to that ID.
           else default to no lower limit, go as far back as API allows
        
           2. If results only below a specific ID are, set max_id to that ID.
           else default to no upper limit, start from the most recent tweet matching the search query.
        
        '''
        since_id = None
        max_id = -1

        # Execute the search query (tweets_per_query at a time)
        tweet_count = 0
        with open(output_file, 'w') as out_file:
            while tweet_count < max_tweets:
                try:
                    if max_id <= 0:
                        if not since_id:
                            new_tweets = api.search(q=search_query,
                                                    count=tweets_per_query)
                        else:
                            new_tweets = api.search(q=search_query,
                                                    count=tweets_per_query,
                                                    since_id=since_id)
                    else:
                        if not since_id:
                            new_tweets = api.search(q=search_query,
                                                    count=tweets_per_query,
                                                    max_id=str(max_id - 1))
                        else:
                            new_tweets = api.search(q=search_query,
                                                    count=tweets_per_query,
                                                    max_id=str(max_id - 1),
                                                    since_id=since_id)
                    if not new_tweets:
                        print("No more tweets found")
                        break

                    # Write tweets to file or socket
                    for tweet in new_tweets:
                        if self.tweets_data_path:
                            out_file.write(
                                jsonpickle.encode(tweet._json,
                                                  unpicklable=False))
                            out_file.write('\n')
                        elif self.client_socket:
                            self.client_socket.send(tweet)

                    tweet_count += len(new_tweets)
                    print("Downloaded {0} tweets".format(tweet_count))
                    max_id = new_tweets[-1].id
                except TweepError as e:
                    # Just exit if any error
                    print("some error : " + str(e))
                    break

        print("Downloaded {0} tweets, Saved to {1}".format(
            tweet_count, output_file))
Exemplo n.º 32
0
TWITDIR = '/home/luke/programming/'

# get the Twitter API app Oauth tokens
sys.path.insert(0, TWITDIR)
import config

# Twitter Setup
import tweepy
from tweepy import OAuthHandler
auth = OAuthHandler(config.alt_consumer_key, config.alt_consumer_secret)
auth.set_access_token(config.alt_access_token, config.alt_access_secret)

# allows greater access speed
from tweepy import AppAuthHandler
auth_app = AppAuthHandler(config.alt_consumer_key, config.alt_consumer_secret)

# choose which of above auth methods we are using:
api = tweepy.API(auth_app, wait_on_rate_limit = True, \
    wait_on_rate_limit_notify = True )

# uncomment to get places etc
# api_oauth = tweepy.API(auth, wait_on_rate_limit = True, \
#     wait_on_rate_limit_notify = True )


# The JSON response from the Twitter API is available in the attribute 
# _json (with a leading underscore), which is not raw JSON but dictionary.
try:
    client = MongoClient(config.MONGO_URI)
except e: