Exemplos de modify_data em Python, exemplos de modules.modify_data.modify_data em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: triangulation.py Projeto: rjaragon53/Policalc

    def __init__(self):

        json_data = {}
        get = gd.get_data()
        mod = md.modify_data()
        print('Triangulating tweets...')
        senators = get.senators()
        concerns = get.concerns()

        with open('raw/gathered_tweets.json', 'r') as json_file:
            data = json.load(json_file)

            for sen in senators:
                json_data[sen] = {}

                for con in concerns:
                    json_data[sen][con] = []

                    for i in range(len(data[sen][con])):
                        tweet = data[sen][con][i]['tweet_text2']
                        tweet = mod.remove_stopwords(tweet)

                        if self.triangulate(tweet,
                                            data[sen][con][i]['tweet_loc']):
                            json_data[sen][con].append(data[sen][con][i])

        with open('clean/final_tweets.json', 'w') as json_file:
            json.dump(json_data, json_file, indent=4, sort_keys=True)

Exemplo n.º 2

0

Exibir arquivo

    def count_response(self, con_list):

        get = gd.get_data()
        mod = md.modify_data()
        tso = ts.TwitterSearchOrder()
        tso.arguments.update({'tweet_mode': 'extended'})
        api = get.api()
        coordinates = get.coordinates()
        con_count = 0
        respo_list = []
        respo_loc = []

        for con in con_list:
            print('\tCounting ' + con + '...')
            tso.set_keywords([con])

            for coordinate in coordinates:
                tso.set_geocode(coordinate['lat'], coordinate['long'], 5, False)

                for tweet in api.search_tweets_iterable(tso, callback=self.avoid_rate_limit):
                    try:
                        tweet_text = tweet['retweeted_status']['full_text']
                    except KeyError:
                        tweet_text = tweet['full_text']

                    cleaned_tweet = mod.clean_tweet(tweet_text)
                    temp_res = cleaned_tweet + ' --- ' + tweet['id_str']
                    if temp_res not in respo_list:
                        respo_list.append(temp_res)
                        respo_loc.append(coordinate['city'])
                        con_count += 1

        with open('raw/response.txt', 'a') as res:
            print('Total: ' + str(con_count))
            res.write(con_list[0] + ': ' + str(con_count) + '\n')
            for i in range(con_count):
                response = respo_list[i] + ' (' + respo_loc[i] + ')'
                res.write(response + '\n')
            res.write('\n')

        return con_count

Exemplo n.º 3

0

Exibir arquivo

    def __init__(self):

        get = gd.get_data()
        mod = md.modify_data()
        dbs = dbase.access_db()
        json_data = {}

        dbs.get_file('tweet_scores_inf', 'DB/clean/tweet_scores_inf.json')
        with open('DB/clean/tweet_scores_inf.json', 'r') as json_file:
            dbs_data = json.load(json_file)

        with open('clean/final_tweets.json', 'r') as json_file:
            data = json.load(json_file)

            senators = get.senators()
            concerns = get.concerns()

            for sen in senators:
                for con in concerns:
                    json_data[sen + ' - ' + con] = []
                    total_tweets = len(data[sen][con])
                    pos = 0
                    neg = 0
                    neu = 0
                    pos_tweets = []
                    neg_tweets = []
                    neu_tweets = []

                    for i in range(total_tweets):
                        tweet = data[sen][con][i]['tweet_text2']
                        text = TextBlob(tweet)
                        result = text.sentiment.polarity
                        score = self.check_score(data[sen][con][i]['user_verified'],
                                                 data[sen][con][i]['user_created'],
                                                 data[sen][con][i]['user_follower'],
                                                 data[sen][con][i]['is_retweet'])

                        if text.sentiment.polarity >= 0.1:
                            pos += score
                            pos_tweets.append(tweet)
                            print('POSITIVE', text.sentiment.polarity, tweet)
                        elif text.sentiment.polarity <= -0.1:
                            neg += score
                            neg_tweets.append(tweet)
                            print('NEGATIVE', text.sentiment.polarity, tweet)
                        else:
                            neu += score
                            neu_tweets.append(tweet)
                            print('NEUTRAL', text.sentiment.polarity, tweet)

                        with open('common_words.txt', 'a') as common_words:
                            tweet = mod.translate(tweet)
                            tweet = mod.remove_stopwords(tweet)
                            text = nltk.word_tokenize(tweet)
                            posTagged = pos_tag(text)
                            result = [(word, map_tag('en-ptb', 'universal', tag)) for word, tag in posTagged]

                            for res in result:
                                if res[1] == 'NOUN' or res[1] == 'VERB' or res[1] == 'ADJ':
                                    if res[0] != sen and res[0] not in con:
                                        text = res[0] + ' '
                                        common_words.write(text)

                    total = pos + neg + neu

                    json_data[sen + ' - ' + con].append({
                        'pos': pos, 'neg': neg, 'neu': neu, 'total': total, 'num_tweets': total_tweets,
                        'pos_tweets': pos_tweets, 'neg_tweets': neg_tweets, 'neu_tweets': neu_tweets
                    })
                    try:
                        for pt in pos_tweets:
                            dbs_data[sen + ' - ' + con][0]['pos_tweets'].append(pt)
                        for nt in neg_tweets:
                            dbs_data[sen + ' - ' + con][0]['neg_tweets'].append(nt)
                        for nt in neu_tweets:
                            dbs_data[sen + ' - ' + con][0]['neu_tweets'].append(nt)

                        dbs_data[sen + ' - ' + con][0]['pos'] += pos
                        dbs_data[sen + ' - ' + con][0]['neg'] += neg
                        dbs_data[sen + ' - ' + con][0]['neu'] += neu

                    except KeyError:
                        json_data[sen + ' - ' + con] = []
                        json_data[sen + ' - ' + con].append({
                            'pos': pos, 'neg': neg, 'neu': neu, 'total': total, 'num_tweets': total_tweets,
                            'pos_tweets': pos_tweets, 'neg_tweets': neg_tweets, 'neu_tweets': neu_tweets
                        })

                    if total != 0:
                        print(sen + ' - ' + con)
                        print('Positive: ' + str(round(pos/total*100, 2)) +
                              '%\nNegative: ' + str(round(neg/total*100, 2)) +
                              '%\nNeutral: ' + str(round(neu/total*100, 2)) + '%')

                        words = re.findall(r'\w+', open('common_words.txt').read().lower())
                        count = Counter(words).most_common(3)
                        common = ''
                        for cnt in count:
                            common = common + cnt[0] + ' '
                        print('General Keywords: ' + common)
                        os.remove("common_words.txt")

                        print('From ' + str(total_tweets) + ' tweets.\n')

        with open('clean/tweet_scores.json', 'w') as json_file:
            json.dump(json_data, json_file, indent=4, sort_keys=True)

        with open('clean/tweet_scores_inf.json', 'w') as json_file:
            json.dump(dbs_data, json_file, indent=4, sort_keys=True)

        os.remove("DB/clean/tweet_scores_inf.json")

Exemplo n.º 4

0

Exibir arquivo

Arquivo: rss.py Projeto: rjaragon53/Policalc

    def __init__(self, week):
        dbs = dbase.access_db()
        news_urls = {
            'gmanews1':
            'https://data.gmanews.tv/gno/rss/news/nation/feed.xml',
            'gmanews2':
            'https://data.gmanews.tv/gno/rss/news/regions/feed.xml',
            'gmanews3':
            'https://data.gmanews.tv/gno/rss/news/ulatfilipino/feed.xml',
            'gmanews4':
            'https://data.gmanews.tv/gno/rss/news/specialreports/feed.xml',
            'philstar1':
            'https://www.philstar.com/rss/headlines',
            'philstar2':
            'https://www.philstar.com/rss/nation',
            'philstar3':
            'https://www.philstar.com/rss/agriculture',
            'inquirer':
            'https://www.inquirer.net/fullfeed',
            'manilatimes':
            'https://www.manilatimes.net/feed/',
            'businessworld':
            'http://www.bworldonline.com/feed/',
            'eaglenews':
            'https://www.eaglenews.ph/feed/',
            'sunstarDav':
            'https://www.sunstar.com.ph/rssFeed/67/29',
            'sunstarDav2':
            'https://www.sunstar.com.ph/rssFeed/67',
            'sunstarMnl':
            'https://www.sunstar.com.ph/rssFeed/70',
            'sunstarMnl2':
            'https://www.sunstar.com.ph/rssFeed/70/50',
            'sunstarZam':
            'https://www.sunstar.com.ph/rssFeed/76',
            'sunstarZam2':
            'https://www.sunstar.com.ph/rssFeed/76/78',
            'sunstarCeb':
            'https://www.sunstar.com.ph/rssFeed/63/1',
            'sunstarCeb2':
            'https://www.sunstar.com.ph/rssFeed/63',
            'sunstar1':
            'https://www.sunstar.com.ph/rssFeed/81',
            'sunstar2':
            'https://www.sunstar.com.ph/rssFeed/81/97',
            'sunstar3':
            'https://www.sunstar.com.ph/rssFeed/selected',
            'businessmirror':
            'https://businessmirror.com.ph/feed/',
            'PhilNewAgency':
            'https://www.feedspot.com/infiniterss.php?q=site:http%3A%2F%2Fwww.pna.gov.ph%2Flatest.rss',
            'interaksyon':
            'https://www.feedspot.com/infiniterss.php?q=site:http%3A%2F%2Fwww.interaksyon.com%2Ffeed'
        }

        print('Gathering rss feed on news sources...')
        mod = md.modify_data()
        raw_rss = []

        if week == 'same_week':
            try:
                dbs.get_file('raw_rss', 'raw/raw_rss.txt')
                dbs.get_file('clean_rss', 'clean/clean_rss.txt')
                with open('raw/raw_rss.txt', 'r') as raw_file:
                    for raw in raw_file:
                        raw = raw.split('\n')[0]
                        raw_rss.append(raw)
            except FileNotFoundError:
                pass

        for key, url in news_urls.items():
            feed = feedparser.parse(url)

            for newsitem in feed['items']:
                news = newsitem.title.encode('ascii', 'ignore').decode('utf-8')

                if news not in raw_rss:
                    raw_rss.append(news)

                    with open('raw/raw_rss.txt', 'a') as raw_file:
                        raw = news + '\n'
                        raw_file.write(raw)

                    news2 = mod.translate(news)
                    news2 = mod.remove_stopwords(news2)

                    with open('clean/clean_rss.txt', 'a') as clean_file:
                        clean = news2 + '\n'
                        clean_file.write(clean)

        print('Saved raw rss data on \"raw_rss.txt\"...')
        print('Saved clean rss data on \"clean_rss.txt\"...')
        print('Finished gathering rss data...')

        conn = sqlite3.connect('policalc.db')
        db_con = conn.cursor()

        with open('raw/raw_rss.txt', 'rb') as file:
            blob_file = file.read()
            db_con.execute(
                "INSERT INTO {} VALUES (:id, :date, :file)".format('raw_rss'),
                {
                    'id': None,
                    'date': dt.now(),
                    'file': blob_file
                })
            conn.commit()

        with open('clean/clean_rss.txt', 'rb') as file:
            blob_file2 = file.read()
            db_con.execute(
                "INSERT INTO {} VALUES (:id, :date, :file)".format(
                    'clean_rss'), {
                        'id': None,
                        'date': dt.now(),
                        'file': blob_file2
                    })
            conn.commit()

        conn.close()

        os.remove('raw/raw_rss.txt')
        os.remove('clean/clean_rss.txt')

Exemplo n.º 5

0

Exibir arquivo

    def __init__(self):

        print('Gathering tweets with political context...')
        get = gd.get_data()
        mod = md.modify_data()
        api = get.api()
        tso = ts.TwitterSearchOrder()
        tso.arguments.update({'tweet_mode': 'extended'})
        res_list = []
        res_dict = {}
        json_data = {}
        senators = get.senators()
        concerns = get.concerns()
        coordinates = get.coordinates()

        for senator in senators:
            json_data[senator] = {}
            print('Gathering tweets mentioning ' + senator + '...')

            for concern in concerns:
                json_data[senator][concern] = []
                con_en = concern.split(',')[0]
                try:
                    con_tl = concern.split(', ')[1]
                    con_cb = concern.split(', ')[2]
                    con_list = [con_en, con_tl, con_cb]
                except IndexError:
                    con_tl = concern.split(', ')[1]
                    con_cb = None
                    con_list = [con_en, con_tl]
                print('\t' + concern + '...')

                for con_item in con_list:
                    tso.set_keywords([senator, con_item])

                    for coordinate in coordinates:
                        tso.set_geocode(coordinate['lat'], coordinate['long'], 5, False)

                        for tweet in api.search_tweets_iterable(tso, callback=self.avoid_rate_limit):
                            try:
                                tweet_text = tweet['retweeted_status']['full_text']
                                is_retweet = True
                            except KeyError:
                                tweet_text = tweet['full_text']
                                is_retweet = False

                            res_text = tweet['id_str'] + ': ' + tweet_text
                            if res_text not in res_list:
                                res_list.append(res_text)

                                if tweet['is_quote_status']:
                                    if is_retweet:
                                        quote_text = tweet['retweeted_status']['quoted_status']['full_text']
                                    else:
                                        quote_text = tweet['quoted_status']['full_text']
                                else:
                                    quote_text = None

                                tweet_text2 = mod.clean_tweet(tweet_text)
                                tweet_text2 = mod.translate(tweet_text2)

                                if tweet_text2 is None:
                                    continue

                                if quote_text is not None:
                                    quote_text2 = mod.clean_tweet(quote_text)
                                    quote_text2 = mod.translate(quote_text2)
                                else:
                                    quote_text2 = None

                                json_data[senator][concern].append({
                                    'tweet_text': tweet_text,
                                    'tweet_text2': tweet_text2,
                                    'is_retweet': is_retweet,
                                    'quote_text': quote_text,
                                    'quote_text2': quote_text2,
                                    'tweet_id': tweet['id'],
                                    'rt_count': tweet['retweet_count'],
                                    'tweet_created': tweet['created_at'],
                                    'tweet_loc': coordinate['city'],
                                    'user_id': tweet['user']['id'],
                                    'user_created': tweet['user']['created_at'],
                                    'user_verified': tweet['user']['verified'],
                                    'user_follower': tweet['user']['followers_count'],
                                    'user_total_tweet': tweet['user']['statuses_count'],
                                    'user_loc': tweet['user']['location']
                                })

                                res_tweet = mod.remove_stopwords(tweet_text2)
                                if quote_text2 is not None:
                                    res_dict = self.initialize_triangulation(
                                        res_dict, res_tweet + ' ' + quote_text2 + ' ' + coordinate['city'])
                                else:
                                    res_dict = self.initialize_triangulation(
                                        res_dict, res_tweet + ' ' + coordinate['city'])

        print('Saving collected tweets into \"gathered_tweets.json\" file...')
        self.save_tweet(json_data)
        self.save_cleaned_tweet(res_dict)
        print('Finished gathering tweets with political context...')

Exemplo n.º 6

0

Exibir arquivo

    def __init__(self):

        news_urls = {
            'gmanews1':
            'https://data.gmanews.tv/gno/rss/news/nation/feed.xml',
            'gmanews2':
            'https://data.gmanews.tv/gno/rss/news/regions/feed.xml',
            'gmanews3':
            'https://data.gmanews.tv/gno/rss/news/ulatfilipino/feed.xml',
            'gmanews4':
            'https://data.gmanews.tv/gno/rss/news/specialreports/feed.xml',
            'philstar1':
            'https://www.philstar.com/rss/headlines',
            'philstar2':
            'https://www.philstar.com/rss/nation',
            'philstar3':
            'https://www.philstar.com/rss/agriculture',
            'inquirer':
            'https://www.inquirer.net/fullfeed',
            'manilatimes':
            'https://www.manilatimes.net/feed/',
            'businessworld':
            'http://www.bworldonline.com/feed/',
            'eaglenews':
            'https://www.eaglenews.ph/feed/',
            'sunstarDav':
            'https://www.sunstar.com.ph/rssFeed/67/29',
            'sunstarDav2':
            'https://www.sunstar.com.ph/rssFeed/67',
            'sunstarMnl':
            'https://www.sunstar.com.ph/rssFeed/70',
            'sunstarMnl2':
            'https://www.sunstar.com.ph/rssFeed/70/50',
            'sunstarZam':
            'https://www.sunstar.com.ph/rssFeed/76',
            'sunstarZam2':
            'https://www.sunstar.com.ph/rssFeed/76/78',
            'sunstarCeb':
            'https://www.sunstar.com.ph/rssFeed/63/1',
            'sunstarCeb2':
            'https://www.sunstar.com.ph/rssFeed/63',
            'sunstar1':
            'https://www.sunstar.com.ph/rssFeed/81',
            'sunstar2':
            'https://www.sunstar.com.ph/rssFeed/81/97',
            'sunstar3':
            'https://www.sunstar.com.ph/rssFeed/selected',
            'businessmirror':
            'https://businessmirror.com.ph/feed/',
            'PhilNewAgency':
            'https://www.feedspot.com/infiniterss.php?q=site:http%3A%2F%2Fwww.pna.gov.ph%2Flatest.rss',
            'interaksyon':
            'https://www.feedspot.com/infiniterss.php?q=site:http%3A%2F%2Fwww.interaksyon.com%2Ffeed'
        }

        print('Gathering rss feed on news sources...')
        mod = md.modify_data()
        raw_rss = []

        try:
            with open('raw/raw_rss.txt', 'r') as raw_file:
                for raw in raw_file:
                    raw = raw.split('\n')[0]
                    raw_rss.append(raw)
        except FileNotFoundError:
            pass

        for key, url in news_urls.items():
            feed = feedparser.parse(url)

            for newsitem in feed['items']:
                news = newsitem.title.encode('ascii', 'ignore').decode('utf-8')

                if news not in raw_rss:
                    raw_rss.append(news)

                    with open('raw/raw_rss.txt', 'a') as raw_file:
                        raw = news + '\n'
                        raw_file.write(raw)

                    news2 = mod.translate(news)
                    news2 = mod.remove_stopwords(news2)

                    with open('clean/clean_rss.txt', 'a') as clean_file:
                        clean = news2 + '\n'
                        clean_file.write(clean)

        print('Saved raw rss data on \"raw_rss.txt\"...')
        print('Saved clean rss data on \"clean_rss.txt\"...')
        print('Finished gathering rss data...')