Пример #1
0
def get_data(keyword, token, token_secret, no_tweets=300, remove_urls=True):
    startTime = datetime.now()
    clf = load_classifier()

    print 'Retrieving tweets...'
    results = search_twitter(keyword, no_tweets, token, token_secret)
    print 'Retrieved ' + str(len(results)) + ' Tweets'

    if remove_urls == True:
        print 'Removing Tweets containing URL...'
        removeTweetsWithUrl(results)

    print 'Creating pandas Dataframe and classifying tweets...'
    DataSet = toDataFrame(results, clf)

    DataSet = post_dataset(DataSet)

    print '\n ' + str(len(DataSet))
    print 'Total time taken to complete...' + str(datetime.now() - startTime)
    return DataSet
Пример #2
0
	def start(self):
		import urllib
		query = '?' + urllib.urlencode({'q': ' OR '.join(self.words), 'rpp': 100})
		search_twitter(query, self.feed, maxpages=20)
Пример #3
0
def getresult():
    checked_url = False
    try:
        # RECEIVE DATA FROM INDEX WEB PAGE
        if 'noTweets' in request.form:
            no_tweets = request.form['noTweets']
        else:
            checked_url = True  # Used to determine if i used the initial form or the advanced options
            no_tweets = 50

        if 'selClf' in request.form:
            name_clf = request.form['selClf']
        else:
            name_clf = 'Maximum Enthropy'

        if 'advQuery' in request.form:
            query = request.form['advQuery']
        else:
            query = request.form['search']

        if 'url' in request.form:
            checked_url = True

        # GET USER ACCESS KEYS
        token = session['twitter_token'][0]
        token_secret = session['twitter_token'][1]

        # GET TWEETS DATA FRAME BASED ON SEARCH QUERY
        # data = get_data(query, token=token, token_secret=token_secret)

        print 'Loading ' + name_clf + ' classifier...'
        clf = act.load_classifier(classifier=name_clf)
        print "Classifier completely loaded"

        print 'Connecting to Twitter...'
        print 'Getting ' + str(no_tweets) + ' tweets'
        twitter_result = search_twitter(query, no_tweets=no_tweets, token=token, token_secret=token_secret)
        print str(no_tweets) + 'Tweets Retrieved Successfully!'

        if checked_url:
            print 'Removing Tweets containing URL...'
            twitter_result = act.removeTweetsWithUrl(twitter_result)
            print 'Removed tweets containing URL'

        print 'Trying to create a Pandas DataFrame...'
        print 'Classifying Tweets...'
        data = act.toDataFrame(twitter_result, clf)
        print 'Tweets classified Correctly!'

        print 'Finishing up the DataSet'
        data = act.post_dataset(data)
        print 'DONE with dataset'

        script_bar, div_bar, all_data, piedata, freq, no_positive, no_negative = performComputation(data)

        top_10_tweets = get_top_tweets(all_data)

        resp = make_response(
            render_template('result.html', tot_tweets=no_tweets, no_positive=no_positive, no_negative=no_negative,
                            div_bar=div_bar, script_bar=script_bar, piedata=piedata, freq=freq, topTweets=top_10_tweets,
                            full_data=all_data, pd=pd))

        return resp
    except Exception as e:
        print e
        return render_template('error.html', error=e)