Python Tweet.mentions примеры использования

Язык программирования: Python

Пространство имен/Пакет: models.tweet

Класс/Тип: Tweet

Метод/Функция: mentions

Примеров на hotexamples.com: 3

Python Tweet.mentions - 3 примера найдено. Это лучшие примеры Python кода для models.tweet.Tweet.mentions, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Tweet(30)

find(11)

check_token(10)

item(4)

all(4)

check_id(4)

factor(3)

old_item(3)

load(3)

action_number(3)

new(3)

find_all(3)

inverse(3)

hashtags(2)

load_all_tweets(2)

full_text(2)

init(2)

mentions(2)

creation_date(2)

created_at(2)

author(2)

objects(2)

load_tweets_by_user_id(1)

load_tweet_by_id(1)

new_item(1)

parent(1)

permission_denied(1)

mid(1)

get(1)

is_event(1)

infected_or_was_infected_by(1)

hasImage(1)

get_all(1)

geo(1)

double(1)

dico(1)

deleted_last_seen(1)

country(1)

authorId(1)

as_json(1)

add_tweet(1)

add_edit_tweet(1)

add(1)

remove(1)

Пример #1

Показать файл

Файл: csv2mongo.py Проект: clemsos/mitras

def extract_and_store_tweets(csvfile, nlp, minetweet):
    print
    print "Start processing %s ..." % csvfile
    print "*" * 20

    start = time()  # measure time

    # LOGGING
    tweets_count = 0
    mentions_count = 0
    urls_count = 0
    hashtags_count = 0
    tags_count = 0
    unvalid_tweets = 0

    i = 1  # iteroator to remember row number on csv
    with open(csvfile, 'r') as f:

        # print 'Processing data...'
        next(f)  # skip csv header
        data = csv.reader(f)

        # one row at a time
        for row in data:

            # create Tweet object
            t = Tweet()

            # Populate Tweet
            t.mid = row[0]
            t.retweetFromPostId = row[1]
            t.userId = row[2]
            t.retweetFromUserId = row[3]
            t.source = row[4]
            t.hasImage = row[5]
            t.txt = row[6]
            t.geo = row[7]
            t.created_at = row[8]
            t.deleted_last_seen = row[9]
            t.permission_denied = row[10]

            # Extract tweet entities
            mentions, urls, hashtags, clean = minetweet.extract_tweet_entities(
                t.txt)

            # add to Tweet
            t.mentions = mentions
            t.urls = urls
            t.hashtags = hashtags
            clean = clean  # text-only version of the tweet for NLP

            # Extract keywords
            dico = nlp.extract_dictionary(clean)

            # remove stopwords and store clean dico
            t.dico = nlp.remove_stopwords(dico)

            # extract entities
            # TODO : ignore stopwords
            # t.entities=nlp.extract_named_entities_from_dico(t.dico)

            # Some count for stats
            mentions_count += len(mentions)
            urls_count += len(urls)
            hashtags_count += len(hashtags)
            tags_count += len(t.entities)

            t.row = i

            valid_utf8 = True
            try:
                t.txt.decode('utf-8')
            except UnicodeDecodeError:
                unvalid_tweets += 1
                valid_utf8 = False
                print ' bad encoding : tweet ', t.mid
                # pprint(t)

            if valid_utf8 is True:
                try:
                    t.save()
                    tweets_count += 1
                except bson.errors.InvalidStringData:
                    print ' bad encoding : tweet ', t.mid
                    # pprint(t)

    # LOG
    print
    print "-" * 10
    print " mentions_count            : %d " % mentions_count
    print " urls_count                : %d " % urls_count
    print " hashtags_count            : %d " % hashtags_count
    print " unvalid tweets            : %d " % unvalid_tweets
    print " TOTAL tweet entities      : %d " % (mentions_count + urls_count +
                                                hashtags_count)
    print " TOTAL named entities (NER): %d " % tags_count
    print
    print "-" * 10
    print "TOTAL tweets processed    : %d" % tweets_count
    print " done in %.3fs" % (time() - start)
    print

Пример #2

Показать файл

Файл: csv2mongo.py Проект: caobaiyue/mitras

def extract_and_store_tweets(csvfile,nlp,minetweet):
    print
    print "Start processing %s ..."%csvfile
    print "*"*20

    start=time() # measure time

    # LOGGING
    tweets_count=0
    mentions_count=0
    urls_count=0
    hashtags_count=0
    tags_count=0
    unvalid_tweets=0


    i=1 # iteroator to remember row number on csv
    with open(csvfile, 'r') as f:

        # print 'Processing data...'
        next(f) # skip csv header
        data = csv.reader(f)

        # one row at a time
        for row in data: 

            # create Tweet object
            t=Tweet()

            # Populate Tweet
            t.mid=row[0]
            t.retweetFromPostId=row[1]
            t.userId=row[2]
            t.retweetFromUserId=row[3]
            t.source=row[4]
            t.hasImage=row[5]
            t.txt=row[6]
            t.geo=row[7]
            t.created_at=row[8]
            t.deleted_last_seen=row[9]
            t.permission_denied=row[10]

            # Extract tweet entities
            mentions,urls,hashtags,clean=minetweet.extract_tweet_entities(t.txt)
            
            # add to Tweet
            t.mentions=mentions
            t.urls=urls
            t.hashtags=hashtags
            clean=clean # text-only version of the tweet for NLP

            # Extract keywords
            dico=nlp.extract_dictionary(clean)

            # remove stopwords and store clean dico
            t.dico=nlp.remove_stopwords(dico)

            # extract entities
            # TODO : ignore stopwords
            # t.entities=nlp.extract_named_entities_from_dico(t.dico)
            
            # Some count for stats
            mentions_count+=len(mentions)
            urls_count+=len(urls)
            hashtags_count+=len(hashtags)
            tags_count+=len(t.entities)

            t.row=i

            valid_utf8 = True
            try:
                t.txt.decode('utf-8')
            except UnicodeDecodeError:
                unvalid_tweets+=1
                valid_utf8 = False
                print ' bad encoding : tweet ',t.mid
                # pprint(t)
            
            if valid_utf8 is True:
                try:
                    t.save()
                    tweets_count+=1
                except bson.errors.InvalidStringData:
                    print ' bad encoding : tweet ',t.mid
                    # pprint(t)

    # LOG
    print
    print "-"*10
    print " mentions_count            : %d "%mentions_count
    print " urls_count                : %d "%urls_count
    print " hashtags_count            : %d "%hashtags_count
    print " unvalid tweets            : %d "%unvalid_tweets
    print " TOTAL tweet entities      : %d "%(mentions_count+urls_count+hashtags_count)
    print " TOTAL named entities (NER): %d "%tags_count
    print
    print "-"*10
    print "TOTAL tweets processed    : %d"%tweets_count
    print " done in %.3fs"%(time()-start)
    print

Пример #3

Показать файл

Файл: script.py Проект: MCFreddie777/pdt-import

def save_tweet(obj):
    """
    Saves each tweet to the database
    :param obj:
    """
    tweet_id = obj['id_str']
    if not tweet_id in tweets_map:

        # dive into recursion until we hit the original tweet
        parent_tweet = None
        if 'retweeted_status' in obj and obj['retweeted_status'] is not None:
            parent_tweet = save_tweet(obj['retweeted_status'])

        location = None
        if obj['coordinates'] and obj['coordinates']['coordinates']:
            location = WKTElement(
                f"POINT({obj['coordinates']['coordinates'][0]} {obj['coordinates']['coordinates'][1]})",
                srid=4326)

        tweet = Tweet(id=obj['id_str'],
                      content=obj['full_text'],
                      location=location,
                      retweet_count=obj['retweet_count'],
                      favorite_count=obj['favorite_count'],
                      happened_at=obj['created_at'])

        tweets_map[tweet_id] = True

        # if user is present in tweet
        if obj['user'] is not None:

            # if user is not previously added in hashmap of accounts create new user
            user_id = obj['user']['id']
            if not user_id in accounts_map:
                account = Account(
                    id=obj['user']['id'],
                    screen_name=obj['user']['screen_name'],
                    name=obj['user']['name'],
                    description=obj['user']['description'],
                    followers_count=obj['user']['followers_count'],
                    friends_count=obj['user']['friends_count'],
                    statuses_count=obj['user']['statuses_count'])
                accounts_map[account.id] = SavedAccountType.FULL
            else:
                # find user in database
                account = session.query(Account).filter(
                    Account.id == user_id).scalar()

                # user was previously saved as user_mention and needs to be updated with new attributes which are not present in user_mentions
                if accounts_map[user_id] == SavedAccountType.MENTION:
                    account.update(obj['user'])
                    accounts_map[account.id] = SavedAccountType.FULL

            # add user as an author of the tweet
            tweet.author = account

        # user mentions
        if (obj['entities'] is not None
                and obj['entities']['user_mentions'] is not None
                and len(obj['entities']['user_mentions'])):
            mentions = []

            # map all mentions
            for mentioned_user in obj['entities']['user_mentions']:

                user_id = mentioned_user['id']

                # if user is mentioned in the status multiple times (not saved to the db yet, already in accounts hashmap)
                # or the user mentions himself before being saved to db
                # (they're saved at the end of save_tweet function along the tweet itself)

                if (user_id in map(lambda x: x.id, mentions)
                        or user_id == tweet.author.id):
                    continue

                # check whether the mention wasn't previously saved in hashmap
                if not user_id in accounts_map:
                    account = Account(
                        id=mentioned_user['id'],
                        screen_name=mentioned_user['screen_name'],
                        name=mentioned_user['user']['name'],
                    )
                    accounts_map[user_id] = SavedAccountType.MENTION
                else:
                    # find user in database
                    account = session.query(Account).filter(
                        Account.id == user_id).scalar()

                # append to the array of mentions
                mentions.append(account)

            # associate hashtags array with tweet
            tweet.mentions = mentions

        # if place is present in tweet and has all fields present
        if (obj['place'] is not None and obj['place']['country_code']
                and obj['place']['country']):
            # if place is not previously added in hashmap of countries create a new country
            country_code = obj['place']['country_code']
            if not country_code in countries_map:
                country = Country(code=obj['place']['country_code'],
                                  name=obj['place']['country'])
                countries_map[country.code] = True
            else:
                # find country in database
                country = session.query(Country).filter(
                    Country.code == country_code).scalar()

            # add place as an country of the tweet
            tweet.country = country

        if (obj['entities'] is not None
                and obj['entities']['hashtags'] is not None
                and len(obj['entities']['hashtags'])):
            hashtags = []

            # map all hashtags
            for hashtag_obj in obj['entities']['hashtags']:

                # check whether the hashtag wasn't previously saved
                hashtag_id = hashtag_obj['text']

                # hashtag_id in hashtags of current tweet, not saved to the db yet, already in hashtag hashmap
                if hashtag_id in map(lambda x: x.value, hashtags):
                    continue

                if not hashtag_id in hashtags_map:
                    hashtags_map[hashtag_id] = True
                    hashtag = Hashtag(hashtag_obj['text'])
                else:
                    # find hashtag in database
                    hashtag = session.query(Hashtag).filter(
                        Hashtag.value == hashtag_id).scalar()

                # append to the array of hashtags
                hashtags.append(hashtag)

            # associate hashtags array with tweet
            tweet.hashtags = hashtags

        # set the parent tweet from the recursion
        if parent_tweet:
            tweet.parent = parent_tweet

        # save tweet object into the db
        session.add(tweet)

        return tweet