Exemplo n.º 1
0
def main():
    environment = defaults.get_environment()
    db = DB(environment, today())

    _ = FindBotsBehaviour(environment, db)

    db.disconnect()
Exemplo n.º 2
0
def main():
    env = defaults.get_environment()
    parser = argparse.ArgumentParser(description='Copy the standard database.')
    parser.add_argument('date',
                        help='the date from which the new database is active')
    args = parser.parse_args()

    _ = NewDB(env, args.date)
Exemplo n.º 3
0
def main():
    # parser = argparse.ArgumentParser(description='Draft stats for the given day and push to cloud for approval.')
    # parser.add_argument('date', metavar='yyyy-mm-dd',
    #                     help='the date to process')
    #
    # args = parser.parse_args()

    environment = defaults.get_environment()
    db = DB(environment, today())
    db_summary = DBSummary(environment)

    api = TwitterAPI(environment, db_summary)

    commands = db.get_commands(screen_names=db_summary.get_account_screen_names())
    processed_commands = db_summary.get_processed_commands(since_id=db.get_baseline_tweet_id())

    for command in commands:
        if command.id in processed_commands:
            logger.info(f'Skipping {command.id}. Already processed: {command.text}')
        else:
            m = re.match('\+([a-zA-Z0-9_]+) ([A-Z][AB]?)( t ([0-9]+))?( dl ([0-9]+))?', command.text)
            if m:
                screen_name = m.group(1)
                category = m.group(2)
                rt_threshold = m.group(4)
                rt_daily_limit = m.group(6)

                db.set_tweeter_category(screen_name=screen_name,
                                        category=category,
                                        rt_threshold=rt_threshold,
                                        rt_daily_limit=rt_daily_limit)

                status_text = f'+{screen_name} set to {category}'
                if rt_threshold is not None:
                    status_text += f' rt threshold {rt_threshold}'
                if rt_daily_limit is not None:
                    status_text += f' dl {rt_daily_limit}'
                save_command(command, status_text, db_summary, api.polling_api())
            elif command.text.lower()[:5] == 'add #':
                tag_name = command.text[5:]
                logger.info(f'Adding {tag_name}')
                call('python3.7 words.py ' + tag_name, shell=True)
                tag = db.get_tag_ranges(tag=f'#{tag_name}', min_override=db.get_baseline_tweet_id())
                print(tag.name_scores)
                name_score = tag.name_scores[-2] if len(tag.name_scores) > 1 else None
                score_text = '{} / {} = {:.1f}'.format(name_score.total_score,
                                                       name_score.status_count,
                                                       name_score.total_score / max(name_score.status_count, 1)
                                                       ) if name_score is not None else ''
                status_text = f'-{tag_name} added. {score_text} {tag.state}'
                save_command(command, status_text, db_summary, api.polling_api())
            else:
                if command.text[:2] not in ('To', 'RT'):
                    logger.info(f'Unknown command {command.id}: {command.text}')

    db_summary.disconnect()
    db.disconnect()
Exemplo n.º 4
0
def main():
    # parser = argparse.ArgumentParser(description='Draft stats for the given day and push to cloud for approval.')
    # parser.add_argument('date', metavar='yyyy-mm-dd',
    #                     help='the date to process')
    #
    # args = parser.parse_args()

    environment = defaults.get_environment()
    db = DB(environment, today())

    _ = DraftTrends(environment, db)
Exemplo n.º 5
0
def main():
    environment = defaults.get_environment()
    db = DB(environment, today())
    db_summary = DBSummary(environment)

    tl = TwitterList(environment, db, db_summary)

    tl.add_to_lists()
    tl.remove_from_lists()

    db.disconnect()
Exemplo n.º 6
0
def main():
    env = defaults.get_environment()
    parser = argparse.ArgumentParser(description='Download Twitter data.')
    parser.add_argument('command',
                        help='trends, lists or comma separated hashtags')
    args = parser.parse_args()

    if args.command in ('trends', 'lists', 'home_timeline'):
        command = args.command
        tags_list = None
        logger.info('Trendlist')
    else:
        command = 'trends'
        tags_list = [{'tag': '#{}'.format(t)} for t in args.command.split(',')]

    # database = env.database
    _ = Words(env, command, tags_list)
Exemplo n.º 7
0
def main():
    parser = argparse.ArgumentParser(
        description='Find bots based on activity on the given day.')
    parser.add_argument('date',
                        metavar='yyyy-mm-dd',
                        help='the date to process')

    args = parser.parse_args()

    environment = defaults.get_environment()
    db = DB(environment, args.date)

    date_to = args.date

    _ = FindBots(db, date_to)

    db.disconnect()
Exemplo n.º 8
0
def main():
    environment = defaults.get_environment()
    db = DB(environment, today())

    start_date = str(datetime.date.today() - timedelta(days=7))
    end_date = str(datetime.date.today() - timedelta(days=1))
    logger.info(f'Dates: {start_date} {end_date}')
    words = {}
    rows = db.get_top_hashtags(start_date, end_date)
    for row in rows:
        words[row[0].lower()] = row[0]

    logger.info(f'{len(words)} words')

    for word, hashtag in words.items():
        db.set_word_hashtag(word, hashtag)
        logger.debug(f'{word:>30} {hashtag}')

    db.disconnect()
Exemplo n.º 9
0
def main():
    env = defaults.get_environment()
    db = DB(env, today())
    # db = DB(env, '2018-12-25')
    promotion = Promotion()

    demotedate_c = (datetime.date.today() -
                    timedelta(days=30)).strftime('%Y-%m-%d')
    demotedate_d = (datetime.date.today() -
                    timedelta(days=90)).strftime('%Y-%m-%d')

    # Promote to C
    logger.info("Tweeter Promotion  %s" % today())

    rows = db.get_tweeter_promotion_stats()

    # If a person has more than POWER_TWEEP followers, then mark it as F if it is negative - cannot do it for all
    # tweeps because
    # would get too many category F's. Don't want to waste resources storing Tweeps we may never encounter in future.
    logger.info(f'Starting loop of {len(rows)} records.')
    row_count = len(rows)
    current_row = 0
    for screen_name, pos, neg, blocked, category, relevance_score, followers_count, name, location, time_zone in rows:
        tweeter = promotion.add(screen_name=screen_name,
                                name=name,
                                category=category,
                                relevance_score=relevance_score,
                                location=location,
                                time_zone=time_zone,
                                followers_count=followers_count)
        # if relevance_score is None:
        #     relevance_score = 0
        # adjustment = 0
        if blocked > 3 and blocked > pos and relevance_score <= -10:
            tweeter.new_category = 'B'
        elif neg > pos and (category is not None or relevance_score != 0
                            or followers_count >= POWER_TWEEP):
            if neg > 3:
                tweeter.adjust_score(-2)
            else:
                tweeter.adjust_score(-1)
        else:  # pos >= neg
            if pos > 3:
                tweeter.adjust_score(2)
            elif pos > 1:
                tweeter.adjust_score(1)
        current_row += 1
        if current_row % 100 == 0:
            logger.info(
                f'{current_row:4d}/{row_count} {category} {screen_name}')

    # Promote top tweeps
    logger.info('Promoting top tweeps.')
    db.c.execute('select screen_name from dim_tweeter where category <= ?',
                 ('C', ))
    rows = db.c.fetchall()
    famous = [row[0] for row in rows]

    trenders = rank_words(
        f'{env.bot_data_directory}/trenders_published_%s.txt', 7)
    non_famous = [trender for trender in trenders if trender not in famous]
    for screen_name in non_famous[:50]:
        tweeter = promotion.add(screen_name=screen_name)
        tweeter.adjust_score(1)

    logger.info('Saving changes.')
    promotion.save_all()

    # Demote from D
    db.set_tweeter_category_by_date(date_category_was_set=demotedate_d,
                                    current_category='D',
                                    new_category='E')

    # Demote from C
    db.set_tweeter_category_by_date(date_category_was_set=demotedate_c,
                                    current_category='C',
                                    new_category='D')

    db.disconnect()
Exemplo n.º 10
0
def main():
    env = defaults.get_environment()
    db_summary = DBSummary(env)

    tweets = db_summary.get_pending_tweets()

    token = db_summary.get_default_token()
    twitter_api = twitter.Api(consumer_key=env.consumer_key,
                              consumer_secret=env.consumer_secret,
                              access_token_key=token.key,
                              access_token_secret=token.secret)

    pub = Publish(env, db_summary, twitter_api)
    # tweetbucket = list()
    tcount = 0
    for tweet in tweets:
        tcount += 1
        if tweet.status == 'pend-rej':
            tweet.status = 'rejected'
            tweet.posted_at = datetime.datetime.now().strftime(
                '%Y-%m-%d %H:%M:%S')
        elif tweet.type == 'retweet':
            if tcount > 1:
                time.sleep(env.tweet_delay)
            result = dict()
            if env.post == 'post':
                result = pub.retweet(tweet)

            else:
                logger.info('Tweet %s not posted due to env setting.',
                            tweet.tweet_id)
            if 'status' in result and result['status'] != 'OK':
                tweet.status = result['message']
            else:
                if tweet.status == 'pend-unpost':
                    tweet.status = 'unposted'
                else:
                    tweet.status = 'posted'
            tweet.posted_at = datetime.datetime.now().strftime(
                '%Y-%m-%d %H:%M:%S')

        else:
            if tcount > 1:
                time.sleep(env.tweet_delay)
            # weight_multiplier = 1

            action = tweet.type
            i = 10
            tweet_text = pub.write_tweet(tweet, i)
            while len(tweet_text) > 145 and i > 1:
                i -= 1
                tweet_text = pub.write_tweet(tweet, i)

            if not env.production:
                tweet_text = tweet_text.replace('#', '-').replace('@', '+')

            img_file = pub.draw_tweet(tweet, pub.scoreWeight[action])

            logger.info('%3d %s', len(tweet_text), tweet_text)

            if env.post == 'post':
                photo = open(img_file, 'rb')
                result = twitter_api.PostUpdate(media=photo, status=tweet_text)
                print(result)
                logger.info('Posted!')

            tweet.status = 'posted'
            tweet.posted_at = datetime.datetime.now().strftime(
                '%Y-%m-%d %H:%M:%S')

        tweet.save_status()

        # tweetbucket.append(tweet)
        # if len(tweetbucket) >= 3:
        #     data = {'tweets': tweetbucket}
        #     Publisher.publish(env, data, 'posted')
        #     tweetbucket = list()
        #     time.sleep(10)

    # if len(tweetbucket) > 0:
    #     data = {'tweets': tweetbucket}
    #     Publisher.publish(env, data, 'posted')

    db_summary.disconnect()
Exemplo n.º 11
0
def main():
    # parser = argparse.ArgumentParser(description='Draft stats for the given day and push to cloud for approval.')
    # parser.add_argument('date', metavar='yyyy-mm-dd',
    #                     help='the date to process')
    #
    # args = parser.parse_args()

    environment = defaults.get_environment()
    db = DB(environment, today())
    db_summary = DBSummary(environment)

    jdata = Publisher.get_pending(environment)

    # c = db_connect(env.summary_database)
    trenders_published = list()
    trenders_all = list()
    already_processed = list()
    if 'tweets' in jdata:
        for tweet in jdata['tweets']:
            tweet_status = db_summary.get_tweet_status(tweet['t_id'])
            if tweet_status is None:
                db_summary.save_tweet(tweet)
                for item in tweet['items']:
                    db_summary.save_tweet_item(tweet, item)
                    if tweet['type'] == 'trenders' and item['selected'] == 'Y':
                        trenders_all.append(item['tweet_text'][1:])
                        if tweet['status'] == 'pend-post':
                            trenders_published.append(item['tweet_text'][1:])
            elif tweet_status in ['posted', 'rejected']:
                tweet['status'] = tweet_status
                already_processed.append(tweet)

        if len(trenders_published) > 0:
            with open(
                    f'{environment.bot_data_directory}/trenders_all_{yesterday_file()}.txt',
                    'a') as f:
                for sn in trenders_all:
                    f.write("%s\n" % sn)
            with open(
                    f'{environment.bot_data_directory}/trenders_published_{yesterday_file()}.txt',
                    'a') as f:
                for sn in trenders_published:
                    f.write("%s\n" % sn)

    db_summary.disconnect()

    trend_date = now()
    # now = now()
    # yesterday = (datetime.datetime.now() - timedelta(days=2)).strftime('%Y-%m-%d')
    if 'trends' in jdata:
        if len(jdata['trends']) > 0:
            # c = db_connect()

            # t = (yesterday,)
            # c.execute('SELECT max(max_id) max_id FROM tag_history where date <= ?', t)
            # row = c.fetchone()
            # max_id = 0
            # if row != None:
            #     max_id = row[0]

            relevant_words = db.get_relevant_words()

            generic_words = db.get_generic_words()

            trs = list()
            for trend in jdata['trends']:
                tag = '#' + trend['hashtag'].lower()
                tr = {
                    'hashtag': trend['hashtag'],
                    'status': 'posted',
                    'trend_at': trend_date
                }
                trs.append(tr)
                tag_discovery_result = db.get_tag_discovery_result(tag)
                status = nvl(tag_discovery_result, 'NONE')

                if trend['status'] == 'pend-post' and status in ('NONE',
                                                                 'AUTO_DEL',
                                                                 'MAN_DEL'):
                    logger.info('Adding: ' + tag)
                    db.save_tag_discovery(tag, 'MAN_ADD')
                elif trend['status'] == 'pend-del' and status in ('AUTO_ADD',
                                                                  'MAN_ADD'):
                    logger.info('Deleting: ' + tag)
                    db.save_tag_discovery(tag, 'MAN_DEL')

                # Trend relevance
                if 'relevance' in trend:
                    relevance = relevant_words[
                        trend['hashtag'].lower()] if trend['hashtag'].lower(
                        ) in relevant_words else 'neutral'
                    if trend['relevance'] != relevance:
                        new_relevance = None if trend[
                            'relevance'] == 'neutral' else trend['relevance']
                        db.set_word_relevance(trend['hashtag'], new_relevance)

                # Trend generic
                if 'generic' in trend:
                    generic = generic_words[trend['hashtag'].lower(
                    )] if trend['hashtag'].lower() in generic_words else ''
                    if trend['generic'] != generic:
                        new_relevance = None if trend[
                            'generic'] == 'neutral' else trend['generic']
                        db.set_word_generic(trend['hashtag'], new_relevance)

            data = {'trends': trs}
            Publisher.publish(environment, data, 'trends')

            db.commit()
            open(f'{environment.temp_file_directory}/compute_daily',
                 'a').close()

    if 'categories' in jdata:
        if len(jdata['categories']) > 0:
            for cat in jdata['categories']:
                db.set_tweeter_category(cat['screen_name'], cat['category'])
                logger.info("Category for", cat['screen_name'], "changed to",
                            cat['category'])

            db.commit()

    if 'words' in jdata:
        if len(jdata['words']) > 0:
            for word in jdata['words']:
                category = word['category']
                if category == '':
                    category = None
                db.set_word_generic(word['word'], category)
                logger.info("Generic for", word['word'], "changed to",
                            category)

    db.disconnect()

    if len(already_processed) > 0:
        data = {'tweets': already_processed}
        Publisher.publish(environment, data, 'posted')
Exemplo n.º 12
0
def main():
    parser = argparse.ArgumentParser(description='Draft stats for the given day and push to cloud for approval.')
    parser.add_argument('date', metavar='yyyy-mm-dd',
                        help='the date to process')

    args = parser.parse_args()

    environment = defaults.get_environment()
    db = DB(environment, args.date)
    db_summary = DBSummary(environment)

    date_skey = db.get_date_skey(args.date)

    actions = list()
    action = {'type': 'trends'}
    actions.append(action)
    action = {'type': 'mentions'}
    actions.append(action)

    action_ind = 0
    tweets = list()
    stat_tweet_count = 0
    while action_ind < len(actions):
        action = actions[action_ind]['type']
        # tweeters = None
        stats = Stats(args.date, action, db, actions, environment,
                      actions[action_ind]['trend'] if action == 'trenders' else None)

        i = 100
        is_tweetable = True

        if action == "trenders":
            tweet = stats.write_tweet(i)
            if not stats.is_trenders_tweet_postable(tweet) or stat_tweet_count >= DAILY_STAT_TWEET_LIMIT:
                is_tweetable = False
        elif action == "trends":
            tweet = stats.write_tweet(i)
        elif action == "mentions":
            tweet = stats.write_tweet(i)

        if is_tweetable:
            db_summary.save_tweet(tweet)
            stat_tweet_count += 1

        if tweet is not None:
            tweets.append(tweet)
        if len(tweets) >= 2:
            data = {'tweets': tweets, 'date': args.date}
            Publisher.publish(environment, data, 'draft')
            tweets = list()
            time.sleep(10)
        action_ind += 1

    db_summary.disconnect()

    # Now get app metrics
    rows = db.get_tweeter_category_counts()

    metric_dict = {'date': args.date, 'other': 0}
    for cat, count in rows:
        if cat is None:
            cat = ' '
        if cat in ('A', 'B', 'C', 'D', 'E', 'F', 'R', ' '):
            metric_dict[cat] = count
        else:
            metric_dict['other'] += count

    # Get count of total tweets and tweets by category
    rows = db.get_tweeter_category_tweet_counts(date_skey)

    metric_dict['tweets_total'] = 0
    metric_dict['tweets_other'] = 0
    for cat, count in rows:
        metric_dict['tweets_total'] += count

        if cat is None:
            cat = ' '
        if cat in ('A', 'B', 'C', 'D', 'E', 'F', 'R', ' '):
            metric_dict['tweets_' + cat] = count
        else:
            metric_dict['tweets_other'] += count

    # Add file sizes
    metric_dict['fact_db_size'] = os.path.getsize(environment.database)
    metric_dict['dim_db_size'] = os.path.getsize(environment.dimension_database)
    metric_dict['summ_db_size'] = os.path.getsize(environment.summary_database)

    followers_count = db.get_tweeter_followers_count('pakpolstats')
    metric_dict['account_followers'] = followers_count

    data = {'tweets': tweets, 'metrics': metric_dict, 'date': args.date}
    Publisher.publish(environment, data, 'draft')
Exemplo n.º 13
0
def main():
    env = defaults.get_environment()

    _ = NewDimDB(env, today())