示例#1
0
def find_this_file(filename: str, stopWords: bool):
    file_contents = read_file(filename)
    hash_details = hashlib.sha256(bytes(file_contents,
                                        encoding='utf-8')).hexdigest()
    data = collections.find_one({hash_details: {
        '$exists': True
    }}, {'_id': False})
    if not data:
        data = manipulate_data(file_contents)
        data = {hash_details: data}
        collections.insert(data, check_keys=False)
    if stopWords:
        return removeStopWords(data[hash_details])
    return data[hash_details]
def get_input(inp):
    # returns the tweets on the basis of provided input
    try:
        tinp = inp.split(":")
        mode = tinp[0]
        usr = tinp[1]
    except:
        pass
    if mode == "twitter":
        if usr == "home":
            tweets = get_home_timeline()  # get tweets from twitter homepage
        elif usr == "none":
            tweets = get_tweets_of()  # get tweets of the authorized api account
        else:
            tweets = get_tweets_of(usr)  # get tweets of any usr by his userid
    else:
        tweets = text_utils.read_file(inp).split("\n")

    return tweets
def get_input(inp):
    # returns the tweets on the basis of provided input
    try:
        tinp = inp.split(':')
        mode = tinp[0]
        usr = tinp[1]
    except:
        pass
    if mode == 'twitter':
        if usr == 'home':
            tweets = get_home_timeline()  # get tweets from twitter homepage
        elif usr == 'none':
            tweets = get_tweets_of(
            )  # get tweets of the authorized api account
        else:
            tweets = get_tweets_of(usr)  # get tweets of any usr by his userid
    else:
        tweets = text_utils.read_file(inp).split('\n')

    return tweets
示例#4
0
    def __init__(self, euro_dirname, sew_dirname, tom_dirname):
        """
        The instantiation of Sentence class
        :param euro_dirname: path of parsed Eurosense sentences
        :param sew_dirname: path of parsed SEW sentences
        :param tom_dirname: path of parsed TOM sentences
        """

        # datasets paths
        self.euro_dirname = euro_dirname
        self.sew_dirname = sew_dirname
        self.tom_dirname = tom_dirname

        # regex for HTML tag
        self.pattern = re.compile(r"&\w+;")

        # stopwords (~600)
        self.cachedStopWords = set(read_file(STOP_WORDS))

        # punctuations
        self.punctuation = set(string.punctuation)

        # it is possible to stemm, but it doesn't improve the score enought
        self.stemmer = nltk.stem.porter.PorterStemmer()
import tweepy
import json
import pprint
import time

import text_utils

# authorize and access Twitter API
path = "./essentials/twitter_credentials/access.json"
json_data = text_utils.read_file(path)
credentials = json.loads(json_data)
auth = tweepy.OAuthHandler(credentials["consumer_key"], credentials["consumer_secret"])
auth.set_access_token(credentials["access_token"], credentials["access_token_secret"])
api = tweepy.API(auth)


def get_home_timeline():
    # returns top 20 tweets on your homepage
    public_tweets = api.home_timeline()
    tweets = []
    for tweet in public_tweets:
        tweets.append(tweet.text)
    return tweets


def get_tweets_of(user=None):
    # returns the top 20 tweets of any user given the user id
    if user == None:
        public_tweets = api.user_timeline()
    else:
        public_tweets = api.user_timeline(user)
import tweepy
import json
import pprint
import time

import text_utils

# authorize and access Twitter API
path = './essentials/twitter_credentials/access.json'
json_data = text_utils.read_file(path)
credentials = json.loads(json_data)
auth = tweepy.OAuthHandler(credentials['consumer_key'],
                           credentials['consumer_secret'])
auth.set_access_token(credentials['access_token'],
                      credentials['access_token_secret'])
api = tweepy.API(auth)


def get_home_timeline():
    # returns top 20 tweets on your homepage
    public_tweets = api.home_timeline()
    tweets = []
    for tweet in public_tweets:
        tweets.append(tweet.text)
    return tweets


def get_tweets_of(user=None):
    # returns the top 20 tweets of any user given the user id
    if user == None:
        public_tweets = api.user_timeline()