Exemplo n.º 1
0
    def connect_CouchDB():
        """Connect to CouchDB"""


        args = core.config('couchdb')
        db_str="tweets_all"
        # Construct a URL from the arguments
        protocol = 'http'
        # Create a safe URL that we can print (omits login/password)
        url = url_safe = '{protocol}://{ip}:{port}/'.format(
            protocol = protocol,
            ip = args['ip_address'],
            port = str(args['port'])
        )
        # Construct an updated URL if a login and password is supplied
        if ('login' in args) and ('password' in args):
            url = '{protocol}://{login}:{password}@{ip}:{port}/'.format(
                protocol = protocol,
                login = args['login'],
                password = args['password'],
                ip = args['ip_address'],
                port = str(args['port'])
            )
        # Attempt to connect to CouchDB server
        try:
            # Calling couchdb.Server will not throw an exception
            couch = couchdb.Server(url)
            print("Connected to CouchDB server at " + url_safe)
        except ConnectionRefusedError:
            print("No CouchDB server at " + url_safe)
            raise
        except couchdb.http.Unauthorized as e:
            print("Connection to CouchDB server refused: " + str(e))
            raise
        except Exception as e:
            print(
                "Failed to connect to CouchDB server at "
                + url_safe
                + ". An unexpected exception was raised: "
                + str(e)
            )
            raise
        # Attempt to connect to CouchDB database
        try:
            _db = couch[db_str]
            print ("Connected to database: " + db_str)

        except couchdb.http.ResourceNotFound:
            try:
                _db = couch.create(db_str)
                print ("Creating new database: " + db_str)
            except couchdb.http.Unauthorized:
                raise
            except Exception as e:
                raise
        except couchdb.http.Unauthorized:
            raise
        except Exception as e:
            raise
        return couch
Exemplo n.º 2
0
 def __init__(self, bucket_name):
     """"""
     args = core.config('nectar')
     self.bucket_name = bucket_name
     try:
         self.connection = boto.s3.connection.S3Connection(
             aws_access_key_id=args['ec2_access_key'],
             aws_secret_access_key=args['ec2_secret_key'],
             port=int(args['s3']['port']),
             host=args['s3']['host'],
             is_secure=True,
             validate_certs=False,
             calling_format=boto.s3.connection.OrdinaryCallingFormat()
         )
     except:
         raise
     # Open the bucket
     try:
         # get_bucket() requires validate=False to work with python3
         self.bucket = self.connection.get_bucket(
             self.bucket_name,
             validate=False
         )
     except:
         raise
     # Save the URL for public access
     self.public_url = args['s3']['public_url']
Exemplo n.º 3
0
    def __init__(self):
        """"""

        args = core.config('twitter', 'OAuth')
        # Initialise Twitter communication
        auth = tweepy.OAuthHandler(args['consumer_key'],
                                   args['consumer_secret'])
        auth.set_access_token(args['access_token'],
                              args['access_token_secret'])
        try:
            self.api = tweepy.API(auth,
                                  wait_on_rate_limit=True,
                                  wait_on_rate_limit_notify=True)
            # tweepy.API constructor does not seem to throw an exception for
            # OAuth failure. Use API.verify_credentials() to validate OAuth
            # instead
            cred = self.api.verify_credentials()
            print("OAuth connection with Twitter established through user @" +
                  cred.screen_name + "\n")
        except tweepy.TweepError as oauth_error:
            print("OAuth connection with Twitter could not be established")
            raise oauth_error
        except:
            raise

        self.db_tweets = db('tweets')
        self.db_tweets_urls = db('tweets_urls')
        self.db_tweets_archive = db('tweets_archive')
        self.db_outlets = db('outlets')
        self.db_articles = db('articles')
        self.senti = SentimentAnalyser()

        self.id_to_outlet = self.db_outlets.get_users()
        self._update_since_ids()
        self.source_ext = {'api': [], 'wa': {}}
Exemplo n.º 4
0
def load_api():
    ''' Function that loads the twitter API after authorizing the user. '''
    args = core.config('twitter', 'OAuth')

    consumer_key = args['consumer_key']
    consumer_secret = args['consumer_secret']
    access_token = args['access_token']
    access_secret = args['access_secret']
    auth = OAuthHandler(consumer_key, consumer_secret)
    auth.set_access_token(access_token, access_secret)
    # load the twitter API via tweepy
    return tweepy.API(auth)
Exemplo n.º 5
0
    def iterate_followers(self):
        # Download list of users from the tweets database
        try:
            users_dict = self.db_outlets.get_users()
            users = list(users_dict)
        except:
            raise
        queue_len = len(users)

        node = core.config('node')
        processes = core.config('processes')

        if node is not None and processes is not None:
            chunk = int(math.floor(len(users) / int(processes)))
            start = int(node) * chunk
            end = start + chunk
        else:
            start = 0
            end = len(users)

        for num in range(start, end):
            self.iterate_follower_timelines(users[num])
Exemplo n.º 6
0
 def connect(self):
     """"""
     args = core.config('couchdb')
     # Construct a URL from the arguments
     protocol = 'http'
     if 'https' in args:
         if args['https'] is True:
             protocol = 'https'
     # Create a safe URL that we can print (omits login/password)
     url = url_safe = '{protocol}://{ip}:{port}/'.format(
         protocol = protocol,
         ip = args['ip_address'],
         port = str(args['port'])
     )
     # Construct an updated URL if a login and password is supplied
     if ('login' in args) and ('password' in args):
         url = '{protocol}://{login}:{password}@{ip}:{port}/'.format(
             protocol = protocol,
             login = args['login'],
             password = args['password'],
             ip = args['ip_address'],
             port = str(args['port'])
         )
     # Attempt to connect to CouchDB server
     try:
         # Calling couchdb.Server will not throw an exception
         couch = couchdb.Server(url)
         # Attempt to GET from the CouchDB server to test connection
         couch.version()
         print("Connected to CouchDB server at " + url_safe)
     except ConnectionRefusedError:
         print("No CouchDB server at " + url_safe)
         raise
     except couchdb.http.Unauthorized as e:
         print("Connection to CouchDB server refused: " + str(e))
         raise
     except Exception as e:
         print(
             "Failed to connect to CouchDB server at "
             + url_safe
             + ". An unexpected exception was raised: "
             + str(e)
         )
         raise
     # Attempt to connect to CouchDB database
     try:
         self._db = couch[self.db_str]
         print ("Connected to database: " + self.db_str)
     # The python-couchdb docs says that a PreconditionFailed
     # exception is raised when a DB isn't found. But in practice it
     # throws a ResourceNotFound exception (CouchDB == 1.0.1)
     except couchdb.http.ResourceNotFound:
         try:
             self._db = couch.create(self.db_str)
             print ("Creating new database: " + self.db_str)
         except couchdb.http.Unauthorized:
             raise
         except Exception as e:
             raise
     except couchdb.http.Unauthorized:
         raise
     except Exception as e:
         raise
     # Update the design document
     self.update_views()
Exemplo n.º 7
0
    def store_tweet(self, tweet_status, source=None):
        """Analyses and stores a tweet in the database.

        This method conducts sentiment analysis and geocoding on the
        tweet before attempting to store it in the database.

        Args:
            tweet_status (tweepy.Status):
            source (dict): Provenance data to store in the tweet.
        """
        # Convert the tweepy Status object into a dict
        tweet_str = json.dumps(tweet_status._json)
        tweet = json.loads(tweet_str)
        print("Processing tweet: " + tweet['id_str'])
        # Add source to tweet
        if source is None:
            source = {'api': [], 'wa': {}}
        tweet.update(source.copy())
        # If this tweet is from one of our outlets, store the outlet
        try:
            if tweet['user']['id_str'] in self.id_to_outlet:
                tweet['wa']['outlet'] = self.id_to_outlet[tweet['user']
                                                          ['id_str']]
        except Exception as e:
            print("Warning: An unexpected Exception was raised in " +
                  "checking for and assigning id_to_outlet.")
            pass
        # If this tweet is replying to one of our outlets, store the outlet
        try:
            if tweet['in_reply_to_user_id_str'] in self.id_to_outlet:
                tweet['wa']['reply_to'] = self.id_to_outlet[
                    tweet['in_reply_to_user_id_str']]
        except KeyError:
            pass
        except Exception as e:
            print("Warning: An unexpected Exception was raised in " +
                  "checking for and assigning id_to_outlet.")
            pass
        # If this tweet mentions one of our outlets, store the outlet(s)
        try:
            if tweet['entities']['user_mentions']:
                for u in tweet['entities']['user_mentions']:
                    if u['id_str'] in self.id_to_outlet and u[
                            'id_str'] is not None:
                        if 'mentions' not in tweet['wa']:
                            tweet['wa']['mentions'] = []
                        tweet['wa']['mentions'].append(
                            self.id_to_outlet[u['id_str']])
        except:
            pass
        # Convert the creation time to a UNIX timestamp
        try:
            tweet['wa']['time'] = core.get_time(tweet['created_at'])
        except Exception as e:
            print("Warning: An unexpected Exception was raised in " +
                  "converting created_at to a UNIX timestamp.")
            print(str(e))
            pass
        # Sentiment analysis
        sentiment = self.senti.analyse(tweet['text'])
        tweet.update(sentiment)
        # Geocoding
        if tweet['coordinates'] is not None:
            try:
                geocode = self.get_geocode(tweet['geo']['coordinates'])
                tweet.update(geocode)
            except:
                print("Warning: geocode() failed for tweet " + tweet['id_str'])
                pass
        # If related to an article, store a duplicate of the tweet in
        # the URLs database
        if 'url' in tweet['wa']:
            response = self.db_tweets_urls.store_tweet(tweet)
            if response is not None:
                print("Stored tweet: " + tweet['id_str'] +
                      " in URLs database.")
        # If the tweet is older than 28 days, store in the archive
        oldest_time = int(time.time() -
                          60 * 60 * 24 * core.config('twitter', 'days'))
        if (int(tweet['wa']['time']) < oldest_time):
            response = self.db_tweets_archive.store_tweet(tweet)
            if response is not None:
                print("Stored tweet " + tweet['id_str'] +
                      " in archive database.")
        else:
            response = self.db_tweets.store_tweet(tweet)
            if response is not None:
                print("Stored tweet " + tweet['id_str'] + " in database.")
        return response
Exemplo n.º 8
0
                    pass
        except:
            pass

    class TweetStreamListener(tweepy.StreamListener):
        def on_status(self, status):
            print(status.text)

    def stream_tweets(self):
        tweetStreamListener = self.TweetStreamListener()
        melbourneStream = tweepy.Stream(auth=self.api.auth,
                                        listener=tweetStreamListener)
        melbourneStream.filter(
            locations=[144.4441, -38.5030, 145.8176, -37.4018], async=True)


th = TweetHarvester()
while False:
    th.iterate_timelines()
    oldest_time = str(
        int(time.time() - 60 * 60 * 24 * core.config('twitter', 'days')))
    th.iterate_articles(oldest_time)
while False:
    th.iterate_replies()
while False:
    th.iterate_retweets()
while False:
    th.iterate_articles()

#tweets = th.db_tweets.get_topic_tweets('rugby')
Exemplo n.º 9
0
#!/usr/bin/python3
""""""

import sys
import os

sys.path.append(os.path.join(os.path.dirname(sys.path[0]), 'harvester'))

import core
from comms.couchdb import CouchDBComms
from comms.nectar import ObjectStore

# Check that the config file is complete
args = core.config()
# couchdb
# Required args: ip_address, port
# Optional args: https, login, password

obj_store = ObjectStore('wa-opengraph')

print(":CouchDBComms: Attempting to construct class")
try:
    db = CouchDBComms('articles2')
    print("Successfully created class")
except:
    raise

#db.store_article({'url': 'http://www.google.com'})
Exemplo n.º 10
0
	print a message as a error

	strErr: message to print or user input
	errType: if equals 'nr' the it prints the the user input specified above is not recognized

	no returns
	'''
	echo('-!- : ')
	if(errType == "nr"): #not recognized error
		echo(strErr + " is not recognized\n\n")
	elif(errType == ""):
		echo(strErr + "\n\n")


# check if data folder exists, if not create it
if not os.path.isdir('data'):
	os.mkdir('data')
# load the config file
conf = core.config('data/config.dat')
# initialize the progress variable <core.study.level class>
progress = None

# main loop
while(exitCode == 0):
	initCmd()

# save the config file if everything goes normally
conf.save()

echo("\nexited with code " + str(exitCode))
Exemplo n.º 11
0
def config(path):
    ''' Edit the config.  '''
    core.config(path)