Пример #1
0
def trends():
    full_db = load_db(database_path=DATABASE_PATH)

    db_init_timestamp = str_2_datetime(full_db['trends']['include_hashtags']['initial_timestamp'],
                                       input_format=time_format_full_with_timezone)

    db_update_timestamp = str_2_datetime(full_db['trends']['include_hashtags']['timestamp'],
                                         input_format=time_format_full_with_timezone)


    print("time since app start: {:.2f} minutes".format((time.time() - start_time) / 60))
    print("time since database init: {}".format(
        (datetime.datetime.now(tz=pytz.utc) - db_init_timestamp)))
    print("time since last update: {:.2f} minutes".format(
        (datetime.datetime.now(tz=pytz.utc) - db_update_timestamp).seconds / 60))
    print('\ndebug:')
    print('time now: {}'.format(datetime.datetime.now(tz=pytz.utc)))
    print('db init time: {}'.format(db_init_timestamp))
    print('diff: {}'.format(datetime.datetime.now(tz=pytz.utc) - db_init_timestamp))

    trends_output = {
        "results": full_db['trends']['include_hashtags'],
        "status": 'ok'
    }

    return jsonify(trends_output)
Пример #2
0
def all():
    full_db = load_db(database_path=DATABASE_PATH)

    db_init_timestamp = str_2_datetime(full_db['trends']['include_hashtags']['initial_timestamp'], input_format=time_format_full_with_timezone)
    db_update_timestamp = str_2_datetime(full_db['trends']['include_hashtags']['timestamp'], input_format=time_format_full_with_timezone)

    print("time since app start: {:.2f} minutes".format((time.time() - start_time) / 60))
    print("time since database init: {:.2f} hours".format((datetime.datetime.now(tz=pytz.utc) - db_init_timestamp).seconds/3600))
    print("time since last update: {:.2f} minutes".format((datetime.datetime.now(tz=pytz.utc) - db_update_timestamp).seconds/60))

    return jsonify(full_db)
Пример #3
0
def get_top_hashtags_from_twitter(country='Japan', debug=False, cache_duration_mins=15, append_db=True):
    cache_db = load_db(database_path=db_path, debug=False)
    hashtags_cache = cache_db['hashtags']

    # compare db and now
    db_timestamp = str_2_datetime(hashtags_cache['timestamp'], input_format=time_format_full_with_timezone)
    db_timestamp = db_timestamp.astimezone(tz=pytz.utc)

    rq_timestamp = datetime.datetime.now(tz=pytz.utc)

    time_diff = rq_timestamp - db_timestamp
    print('time since last hashtags API call: {}'.format(time_diff))
    if time_diff.seconds < cache_duration_mins * 60:
        # DB
        output_json = json.dumps(hashtags_cache['content'], ensure_ascii=False)
        return output_json
    else:
        output_json = get_top_hashtags_from_twitter_api(country=country, debug=debug)
        # update
        output_list = json.loads(output_json)

        if append_db:
            output_list = hashtags_cache['content'] + output_list

        cache_db['hashtags']['content'] = output_list
        cache_db['hashtags']['timestamp'] = datetime_2_str(rq_timestamp, output_format=time_format_full_with_timezone)

        update_db(cache_db, database_path=db_path, debug=debug)
        return output_json
def all():
    """
        the full database. use to get updated db before updating container
        ---
        responses:
          200:
            description: returns database
            schema:
              id: databaseGet
              properties:
                results:
                  type: json
                  default: {trends: {include_hashtags: {}, exclude_hashtags: {}, hashtags: {}}
                status:
                  type: number
                  default: 200
    """
    args = request.args.get('q')
    if not args:
        args = "main"

    if args == "main":
        full_db = load_db(database_path=DATABASE_PATH)

        db_init_timestamp = str_2_datetime(
            full_db['trends']['include_hashtags']['initial_timestamp'],
            input_format=time_format_full_with_timezone)
        db_update_timestamp = str_2_datetime(
            full_db['trends']['include_hashtags']['timestamp'],
            input_format=time_format_full_with_timezone)

        print("time since app start: {:.2f} minutes".format(
            (time.time() - start_time) / 60))
        print("time since database init: {:.2f} hours".format(
            (datetime.datetime.now(tz=pytz.utc) - db_init_timestamp).seconds /
            3600))
        print("time since last update: {:.2f} minutes".format(
            (datetime.datetime.now(tz=pytz.utc) - db_update_timestamp).seconds
            / 60))

    elif args == "trends":
        full_db = load_db(database_path=TRENDS_DATABASE_PATH)
    elif args == "top_posts":
        full_db = load_db(database_path=TOP_RETWEETS_DATABASE_PATH)

    return jsonify(full_db)
Пример #5
0
def get_top_trends_from_twitter(country='Japan', exclude_hashtags=False, debug=False, cache_duration_mins=15, append_db=True):
    cache_db = load_db(database_path=db_path, debug=False)
    trends_db = cache_db['trends']
    if exclude_hashtags:
        trends_cache = trends_db['exclude_hashtags']
    else:
        trends_cache = trends_db['include_hashtags']

    # compare db and now
    try:
        db_timestamp = str_2_datetime(trends_cache['timestamp'], input_format=time_format_full_with_timezone)
    except ValueError:
        db_timestamp = str_2_datetime(trends_cache['timestamp'], input_format=time_format_full_no_timezone)
        db_timestamp = db_timestamp.astimezone(tz=pytz.utc)

    rq_timestamp = datetime.datetime.now(tz=pytz.utc)

    time_diff = rq_timestamp - db_timestamp
    print('time since last trends API call: {} (h:m:s)'.format(time_diff))
    print('time diff in seconds: {}'.format(time_diff.seconds))
    print('time in db: {}'.format(db_timestamp))
    print('time in rq: {}'.format(rq_timestamp))
    if time_diff.seconds < cache_duration_mins*60:
        print('less than cache duration, returning cache')
        output_json = json.dumps(trends_cache['content'], ensure_ascii=False)
        return output_json
    else:
        output_json = get_top_trends_from_twitter_api(country=country, exclude_hashtags=exclude_hashtags)
        # update
        output_list = json.loads(output_json)

        if append_db:
            output_list = trends_cache['content'] + output_list
            
        if exclude_hashtags:
            cache_db['trends']['exclude_hashtags']['content'] = output_list
            cache_db['trends']['exclude_hashtags']['timestamp'] = datetime_2_str(rq_timestamp, output_format=time_format_full_with_timezone)
        else:
            cache_db['trends']['include_hashtags']['content'] = output_list
            cache_db['trends']['include_hashtags']['timestamp'] = datetime_2_str(rq_timestamp, output_format=time_format_full_with_timezone)

        update_db(cache_db, database_path=db_path, debug=debug)
        return output_json
Пример #6
0
def get_top_trends_from_twitter_api(country='Japan', exclude_hashtags=True):
    """
    what is it useful for?
    participation. from twitter API docs

    How can I participate in a trend?
    Simply post a Tweet including the exact word or phrase as it appears in the trends list
    (with the hashtag, if you see one). Due to the large number of people Tweeting about these
    specific trends, you may not always be able to find your particular Tweet in search, but
    your followers will always see your Tweets.

    twitter Ads API has a keyword insights endpoint
    ref: https://developer.twitter.com/en/docs/ads/audiences/api-reference/keyword-insights.html#
    :param filter:
    :return:
    """
    # this stupid WOEID requires yweather to get (a library), because YAHOO itself has stopped supporting it
    # WOEID
    woeid_client = yweather.Client()
    woeid = woeid_client.fetch_woeid(location=country)

    if exclude_hashtags :
        trends = api.GetTrendsWoeid(woeid, exclude='hashtags')
    else:
        trends = api.GetTrendsWoeid(woeid, exclude=None)

    output = []
    for trend in trends:
        trend = trend.AsDict()

        # get volumes
        try:
            tw_volume = int(trend['tweet_volume']),
        except:
            tw_volume = [0]

        # match time with timezone
        timestamp_str = trend['timestamp']  # this is utc
        timestamp_dt = str_2_datetime(timestamp_str, input_format=time_format_twitter_trends).replace(tzinfo=pytz.utc)

        # timestamp_local = timestamp_dt.astimezone(tz=pytz.utc)
        timestamp_utc_str = datetime_2_str(timestamp_dt, output_format=time_format_full_with_timezone)

        output.append({
            "label": trend['name'],
            "volume": tw_volume,
            "time": timestamp_utc_str,
            "query": trend['query'],
            "url": trend['url']
        })

    output_json = json.dumps(output, ensure_ascii=False)
    return output_json
def trends():
    """
        loads trends database
        ---
        responses:
          200:
            description: returns database
            schema:
              id: databaseGet
              properties:
                results:
                  type: json
                  default: {content: {}, timestamp: "", initial_timestamp: ""}
                status:
                  type: string
                  default: ok
    """
    full_db = load_db(database_path=DATABASE_PATH)

    db_init_timestamp = str_2_datetime(
        full_db['trends']['include_hashtags']['initial_timestamp'],
        input_format=time_format_full_with_timezone)

    db_update_timestamp = str_2_datetime(
        full_db['trends']['include_hashtags']['timestamp'],
        input_format=time_format_full_with_timezone)

    print("time since app start: {:.2f} minutes".format(
        (time.time() - start_time) / 60))
    print("time since database init: {}".format(
        (datetime.datetime.now(tz=pytz.utc) - db_init_timestamp)))
    print("time since last update: {:.2f} minutes".format(
        (datetime.datetime.now(tz=pytz.utc) - db_update_timestamp).seconds /
        60))
    print('\ndebug:')
    print('time now: {}'.format(datetime.datetime.now(tz=pytz.utc)))
    print('db init time: {}'.format(db_init_timestamp))
    print('diff: {}'.format(
        datetime.datetime.now(tz=pytz.utc) - db_init_timestamp))

    # send back only a portion of the db
    results = full_db['trends']['include_hashtags']
    del full_db

    contents = results['content']

    output_content = []
    for c in contents:
        output_content.append({
            "label": c['label'],
            "time": c['time'],
            "volume": c['volume']
        })

    output_results = {
        "content": output_content,
        "timestamp": results['timestamp'],
        "initial_timestamp": results['initial_timestamp']
    }

    trends_output = {"results": output_results, "status": 'ok'}

    return jsonify(trends_output)
def get_top_trends_from_twitter(country='Japan', exclude_hashtags=False, debug=False, cache_duration_mins=15, append_db=True):
    """
    also updates daily trends db, but doesn't return it
    for trends, timestamp used is in time called

    :param country:
    :param exclude_hashtags:
    :param debug:
    :param cache_duration_mins:
    :param append_db:
    :return:
    """
    # load main db
    cache_db = load_db(database_path=db_path, debug=False)
    trends_db = cache_db['trends']

    if exclude_hashtags:
        trends_cache = trends_db['exclude_hashtags']
    else:
        trends_cache = trends_db['include_hashtags']

    # load trends + top retweets db
    trend_search_db = load_db(database_path=trends_db_path, debug=False)



    # MAIN_DB ONLY
    try:
        db_timestamp = str_2_datetime(trends_cache['timestamp'], input_format=time_format_full_with_timezone)
    except ValueError:
        db_timestamp = str_2_datetime(trends_cache['timestamp'], input_format=time_format_full_no_timezone)
        db_timestamp = db_timestamp.astimezone(tz=pytz.utc)

    rq_timestamp = datetime.datetime.now(tz=pytz.utc)

    time_diff = rq_timestamp - db_timestamp
    print('time since last trends API call: {} (h:m:s)'.format(time_diff))
    print('time diff in seconds: {}'.format(time_diff.seconds))
    print('time in db: {}'.format(db_timestamp))
    print('time in rq: {}'.format(rq_timestamp))

    if time_diff.seconds < cache_duration_mins*60:
        print('less than cache duration, returning cache')
        output_json = json.dumps(trends_cache['content'], ensure_ascii=False)
        return output_json
    else:
        output_json, img_output_json = get_top_trends_from_twitter_api(country=country, exclude_hashtags=exclude_hashtags)
        # update
        output_list = json.loads(output_json)
        trend_search_list = json.loads(img_output_json)

        if append_db:
            output_list = trends_cache['content'] + output_list
            trend_search_list = trend_search_db['trends'] + trend_search_list
            
        if exclude_hashtags:
            cache_db['trends']['exclude_hashtags']['content'] = output_list
            cache_db['trends']['exclude_hashtags']['timestamp'] = datetime_2_str(rq_timestamp, output_format=time_format_full_with_timezone)
        else:
            cache_db['trends']['include_hashtags']['content'] = output_list
            cache_db['trends']['include_hashtags']['timestamp'] = datetime_2_str(rq_timestamp, output_format=time_format_full_with_timezone)

        trend_search_db['trends'] = trend_search_list

        update_db(cache_db, database_path=db_path, debug=debug)
        update_db(trend_search_db, database_path=trends_db_path, debug=debug)

        print('trends and image database updated.')

        del cache_db
        del trends_db
        del trends_cache
        del trend_search_db
        del trend_search_list
        del output_list
        del output_json
        del img_output_json

        print('memory freed.')
def process_tweets(tweets_response, keep_all=False, debug=False):
    """
    by default, processing discards tweets with no retweets or likes
    keep_all=False keeps all tweets, whether they have retweets or not

    :param tweets_response:
    :param keep_all:
    :param debug:
    :return:
    """
    tweets = tweets_response

    #print(json.dumps(tweets, indent=4, ensure_ascii=False))

    output_tweets = []
    for tweet in tweets:
        # loop through every tweet
        output_tweet = {}
        output_tweet['likes'] = 0
        for k, v in tweet.items():
            if k == "favorite_count" or k == "retweeted_status":
                # print('checking favorite_count at {}'.format(k))
                # print(v)
                if k == "favorite_count" and v:
                    output_tweet['likes'] = v
                elif k == "retweeted_status" and v:
                    # print("rt:", v)
                    try:
                        output_tweet['likes'] = v['favorite_count']
                    except:
                        print('favorites not found')
                        print(v)
                        pass

            elif k == "media" and v:
                # turn media dict into img url
                output_tweet[k] = []
                for m in v:
                    output_tweet[k].append(m['media_url_https'])

            elif k == "id" and v:
                # make url from id and dispose id
                output_tweet['url'] = "https://twitter.com/anyuser/status/" + str(v)

            elif k == "retweet_count":
                if v:
                    if debug: print('       picking this: ', k, v)
                    output_tweet[k] = v
                else:
                    if debug: print('       skipping this: ', k, v)
                    # not keeping those with 0 RT
                    output_tweet[k] = 0

            elif k == "created_at":
                tweet_creation_time = str_2_datetime(v, input_format=time_format_twitter_created_at)
                tweet_checked_time = datetime.datetime.now(tz=pytz.utc)

                output_tweet['timestamp'] = {
                    "created": datetime_2_str(tweet_creation_time, output_format=time_format_full_with_timezone),
                    "last_checked": datetime_2_str(tweet_checked_time, output_format=time_format_full_with_timezone)
                }

            else:
                # keep k:v same
                if debug: print('keeping this: ', k, repr(v))
                output_tweet[k] = v

        print('num of likes: ', output_tweet['likes'])

        output_tweets.append(output_tweet)

    output = []
    if not keep_all:
        for o in output_tweets:
            if o['likes'] > 0 and o['retweet_count'] > 0:
                output.append(o)
    else:
        output = output_tweets

    return output