def test_get_rankings_year_after_sofifa():
    year = 2008
    temp_folder = os.path.join(os.getcwd(), 'temp')
    csv_file = '{}-{}.csv'.format(year, year + 1)
    from_file = os.path.join(RAW_CLEANED_DATA_FILE_PATH, csv_file)
    to_file = os.path.join(temp_folder, csv_file)
    make_directory(temp_folder)

    get_rankings(from_file, to_file, '{}-12-31'.format(str(year+1)), include_prediction=False)

    cmp_file = os.path.join(STANDINGS_PATH, csv_file)
    assert compare_csv(cmp_file, to_file)
    remove_directory(temp_folder)
Exemplo n.º 2
0
def get_html():
    season_record = wins()
    streak_record = streak()
    lost = determine_if_lost()
    ranking = get_rankings()
    days_since_lost = countdown_script()
    return "<!DOCTYPE html><html><head><style>*{font-family: Arial, sans-serif;text-align: center;position: relative;}.top_title{font-size:50pt;top: 5%;}.big_text{font-size: 300pt;top: 25%;}.record{font-size: 50pt;top: 40%;}</style><title>Has FSU Lost Yet?</title></head><body><div class=\"top_title\">Has FSU Lost Yet?</div><div class=\"big_text\"><b>" + lost + "</b></div><div class=\"record\" id =\"winningStreak\">Winning Streak: " + streak_record + "</div><div class=\"record\" id=\"thisSeason\">This Season: " + season_record + "</div><div class=\"record\">College Football Playoff Ranking: " + ranking + "</div>" + days_since_lost + "<script>(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)})(window,document,'script','//www.google-analytics.com/analytics.js','ga');ga('create', 'UA-56656391-1', 'auto');ga('require', 'displayfeatures');ga('send', 'pageview');</script></body></html>"
Exemplo n.º 3
0
def get_html():
    season_record = wins()
    streak_record = streak()
    lost = determine_if_lost()
    ranking = get_rankings()
    days_since_lost = countdown_script()
    return "<!DOCTYPE html><html><head><style>*{font-family: Arial, sans-serif;text-align: center;position: relative;}.top_title{font-size:50pt;top: 5%;}.big_text{font-size: 300pt;top: 25%;}.record{font-size: 50pt;top: 40%;}</style><title>Has FSU Lost Yet?</title></head><body><div class=\"top_title\">Has FSU Lost Yet?</div><div class=\"big_text\"><b>" + lost + "</b></div><div class=\"record\" id =\"winningStreak\">Winning Streak: " + streak_record + "</div><div class=\"record\" id=\"thisSeason\">This Season: " + season_record + "</div><div class=\"record\">College Football Playoff Ranking: " + ranking + "</div>" + days_since_lost + "<script>(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)})(window,document,'script','//www.google-analytics.com/analytics.js','ga');ga('create', 'UA-56656391-1', 'auto');ga('require', 'displayfeatures');ga('send', 'pageview');</script></body></html>"
Exemplo n.º 4
0
 def get_rankings(self, include_scores=False):
     """Return a list of players (or a list of tuples (player ID, score) if include_scores==True) in ranked order, where "score" indicates a player's internal score (higher is better)."""
     return get_rankings(self, include_scores)
Exemplo n.º 5
0
def run():
  """Contains the main logic for this analysis."""
  FileLog.set_log_dir()

  seeds = Util.load_seeds()
  for category in _CATEGORIES:
    log('Preforming analysis for category: %s' % category)
    size_top_news = _SIZE_TOP_NEWS
    if category:
      size_top_news = .10

    data_set = DataSet.TESTING
    retweets = set()
    if _SWITCHED:
      data_set = DataSet.TRAINING
    if _EXCLUDE_RETWEETS:
      retweets = ground_truths.find_retweets(_TESTING_SET_MONTHS)
    log('Num retweets to exclude: %s' % len(retweets))
    gt_rankings = ground_truths.get_gt_rankings(seeds, data_set, category,
                                                exclude_tweets_within_delta=_EXCLUDE_TWEETS_WITHIN_DELTA,
                                                retweets=retweets)
    log('Num ground_truth_rankings: %s' % len(gt_rankings))

    # Format for use later.
    ground_truth_url_to_rank = {}
    for rank, (url, count) in enumerate(gt_rankings):
      ground_truth_url_to_rank[url] = rank

    target_news = ground_truths.find_target_news(gt_rankings, size_top_news)
    log('Size target_news: %s' % len(target_news))

    for delta in _DELTAS:
      run_params_str = 'd%s_t%s_e%s_%s' % (delta, int(size_top_news * 100),
                                           int(_SIZE_EXPERTS * 100), category)
      info_output_dir = '../graph/FolkWisdom/%s/info/' % run_params_str
      Util.ensure_dir_exist(info_output_dir)


      groups, d_num_followers  = user_groups.get_all_user_groups(delta, category)
      log('Num experts (precision): %s' % len(groups.precision))
      log('Num experts (fscore): %s' % len(groups.fscore))
      log('Num experts (ci): %s' % len(groups.ci))
      log('Num Super Experts: %s' %len(groups.super_experts))
      log('Num Social Bias Experts: %s' % len(groups.social_bias))

      log('Finding rankings with an %s hour delta.' % delta)
      ranks = rankings.get_rankings(delta, seeds, groups, category, d_num_followers)

      # Output some interesting info to file
      size_market_unfiltered = '0'
      with open('../data/FolkWisdom/size_of_market_unfiltered.txt') as in_file:
        size_market_unfiltered = in_file.readline().strip()

      with open('%suser_demographics_%s.txt'
                % (info_output_dir, run_params_str), 'w') as output_file:
        output_file.write('Number of Newsaholics: %s\n' % len(groups.newsaholics))
        output_file.write('Number of Active Users: %s\n' % len(groups.active_users))
        output_file.write('Number of Common Users: %s\n' % len(groups.common_users))
        output_file.write('\n');
        output_file.write('Number of Precision Experts: %s\n' % len(groups.precision))
        output_file.write('Number of F-Score Experts: %s\n' % len(groups.fscore))
        output_file.write('Number of CI Experts: %s\n' % len(groups.ci))
        output_file.write('Number of Social Bias Experts: %s\n' % len(groups.social_bias))
        output_file.write('Total number of unique experts: %s\n' % len(groups.all_experts))
        output_file.write('Number of Precision and F-Score Experts: %s\n'
                          % len(groups.precision.intersection(groups.fscore)))
        output_file.write('Number of Precision and CI Experts: %s\n'
                          % len(groups.precision.intersection(groups.ci)))
        output_file.write('Number of F-Score and CI Experts: %s\n'
                          % len(groups.fscore.intersection(groups.ci)))
        output_file.write('Number of Super Experts: %s\n' % len(groups.super_experts))
        output_file.write('\n');
        output_file.write('Number of Users (Total): %s\n'
                          % (len(groups.newsaholics) + len(groups.active_users)
                             + len(groups.common_users)))
        output_file.write('Size of market (unfiltered): %s\n'
                          % size_market_unfiltered)
        output_file.write('\n')
        # output_file.write('Number of votes by Newsaholics: %s\n'
        #                   % num_votes_newsaholics)
        # output_file.write('Number of votes by Market: %s\n' % num_votes_market)
        # output_file.write('Number of votes by Active Users: %s\n'
        #                   % num_votes_active)
        # output_file.write('Number of votes by Common Users: %s\n'
        #                   % num_votes_common)
        # output_file.write('\n');
        # output_file.write('Number of votes by Expert (Precision) Users: %s\n'
        #         % num_votes_expert_precision) 
        # output_file.write('Number of votes by Expert (fscore) Users: %s\n'
        #         % num_votes_expert_fscore) 
        # output_file.write('Number of votes by Expert (ci) Users: %s\n'
        #         % num_votes_expert_ci) 
        # output_file.write('Number of votes by Super Experts: %s\n'
        #                   % num_votes_expert_s)
        # output_file.write('Number of votes by Social Bias Experts: %s\n'
        #                   % num_votes_expert_sb)
        # output_file.write('\n')
        # output_file.write('Total Number of votes cast: %s\n'
        #                   % (num_votes_newsaholics + num_votes_active
        #                      + num_votes_common))
        # output_file.write('\n')
        output_file.write('Total Number of Good News: %s\n' % len(target_news))

      log('Ground Truth Top 50')
      for i in range(min(len(gt_rankings), 50)):
        url, count = gt_rankings[i]
        log('[%s] %s\t%s' %(i, url.strip(), count))
      log('-----------------------------------')
      log('Newsaholic Top 5')
      for i in range(min(len(ranks.newsaholics), 5)):
        url, count = ranks.newsaholics[i]
        log('[%s] %s\t%s' %(i, url.strip(), count))
      log('-----------------------------------')
      log('Active Top 5')
      for i in range(min(len(ranks.active_users), 5)):
        url, count = ranks.active_users[i]
        log('[%s] %s\t%s' %(i, url.strip(), count))
      log('-----------------------------------')
      log('Common Top 5')
      for i in range(min(len(ranks.common_users), 5)):
        url, count = ranks.common_users[i]
        log('[%s] %s\t%s' %(i, url.strip(), count))
      log('-----------------------------------')
      log('nonexpert Top 5')
      for i in range(min(len(ranks.non_experts), 5)):
        url, count = ranks.non_experts[i]
        log('[%s] %s\t%s' %(i, url.strip(), count))
      log('-----------------------------------')
      log('Expert (Precision) Top 5')
      for i in range(min(len(ranks.precision), 5)):
        url, count = ranks.precision[i]
        log('[%s] %s\t%s' %(i, url.strip(), count))
      log('-----------------------------------')
      log('Expert (fscore) Top 5')
      for i in range(min(len(ranks.fscore), 5)):
        url, count = ranks.fscore[i]
        log('[%s] %s\t%s' %(i, url.strip(), count))
      log('-----------------------------------')
      log('Expert (ci) Top 5')
      for i in range(min(len(ranks.ci), 5)):
        url, count = ranks.ci[i]
        log('[%s] %s\t%s' %(i, url.strip(), count))
      log('-----------------------------------')
      log('Super Expert Top 5')
      for i in range(min(len(ranks.super_experts), 5)):
        url, count = ranks.super_experts[i]
        log('[%s] %s\t%s' %(i, url.strip(), count))
      log('-----------------------------------')
      log('Social Bias Expert Top 5')
      for i in range(min(len(ranks.social_bias), 5)):
        url, count = ranks.social_bias[i]
        log('[%s] %s\t%s' %(i, url.strip(), count))

        
      market_rank_to_url = {}
      newsaholic_rank_to_url = {}
      active_rank_to_url = {}
      common_rank_to_url = {}
      expert_p_rank_to_url = {}
      expert_f_rank_to_url = {}
      expert_c_rank_to_url = {}
      expert_s_rank_to_url = {}
      for rank, (url, count) in enumerate(ranks.newsaholics):
        newsaholic_rank_to_url[rank] = url
      for rank, (url, count) in enumerate(ranks.population):
        market_rank_to_url[rank] = url
      for rank, (url, count) in enumerate(ranks.active_users):
        active_rank_to_url[rank] = url
      for rank, (url, count) in enumerate(ranks.common_users):
        common_rank_to_url[rank] = url
      for rank, (url, count) in enumerate(ranks.precision):
        expert_p_rank_to_url[rank] = url
      for rank, (url, count) in enumerate(ranks.fscore):
        expert_f_rank_to_url[rank] = url
      for rank, (url, count) in enumerate(ranks.ci):
        expert_c_rank_to_url[rank] = url
      for rank, (url, count) in enumerate(ranks.super_experts):
        expert_s_rank_to_url[rank] = url

      population_url_to_rank = {}
      market_url_to_rank = {}
      precision_url_to_rank = {}
      fscore_url_to_rank = {}
      ci_url_to_rank = {}
      ci_1_url_to_rank = {}
      ci_2_url_to_rank = {}
      ci_3_url_to_rank = {}
      common_url_to_rank = {}
      for rank, (url, count) in enumerate(ranks.population):
        population_url_to_rank[url] = rank
      for rank, (url, count) in enumerate(ranks.non_experts):
        market_url_to_rank[url] = rank
      for rank, (url, count) in enumerate(ranks.precision):
        precision_url_to_rank[url] = rank
      for rank, (url, count) in enumerate(ranks.fscore):
        fscore_url_to_rank[url] = rank
      for rank, (url, count) in enumerate(ranks.ci):
        ci_url_to_rank[url] = rank
      for rank, (url, count) in enumerate(ranks.ci_1):
        ci_1_url_to_rank[url] = rank
      for rank, (url, count) in enumerate(ranks.ci_2):
        ci_2_url_to_rank[url] = rank
      for rank, (url, count) in enumerate(ranks.ci_3):
        ci_3_url_to_rank[url] = rank
      for rank, (url, count) in enumerate(ranks.common_users):
        common_url_to_rank[url] = rank

      precisions, recalls = precision_recall.get_precision_recalls(gt_rankings, ranks)

      mixed_rankings = mixed_model.get_mixed_rankings(market_url_to_rank,
                                                      precisions.non_experts,
                                                      precision_url_to_rank,
                                                      precisions.precision,
                                                      fscore_url_to_rank,
                                                      precisions.fscore,
                                                      ci_url_to_rank,
                                                      precisions.ci,
                                                      ground_truth_url_to_rank)

      mixed_inact_rankings = mixed_model.get_mixed_rankings(common_url_to_rank,
                                                            precisions.common_users,
                                                            precision_url_to_rank,
                                                            precisions.precision,
                                                            fscore_url_to_rank,
                                                            precisions.fscore,
                                                            ci_url_to_rank,
                                                            precisions.ci,
                                                            ground_truth_url_to_rank)

      mixed_ci_rankings = mixed_model.get_mixed_rankings(market_url_to_rank,
                                                         precisions.non_experts,
                                                         ci_1_url_to_rank,
                                                         precisions.ci_1,
                                                         ci_2_url_to_rank,
                                                         precisions.ci_2,
                                                         ci_3_url_to_rank,
                                                         precisions.ci_3,
                                                         ground_truth_url_to_rank)
                                                         

      mixed_precisions, mixed_recalls = precision_recall.calc_precision_recall(gt_rankings, 
                                                                               mixed_rankings)

      mixed_inact_precisions, mixed_inact_recalls = precision_recall.calc_precision_recall(gt_rankings, 
                                                                                           mixed_inact_rankings)

      mixed_ci_precisions, mixed_ci_recalls = precision_recall.calc_precision_recall(gt_rankings, 
                                                                                     mixed_ci_rankings)

      log('-----------------------------------')
      log('Mixed (min) Top 5')
      for i in range(min(len(mixed_rankings), 5)):
        url, count = mixed_rankings[i]
        log('[%s] %s\t%s' %(i + 1, url, count))
      log('-----------------------------------')

      with open('%sranking_comparisons_%s.tsv'
                % (info_output_dir, run_params_str), 'w') as out_file:
        for gt_rank, (gt_url, _) in enumerate(gt_rankings):
          market_rank = 0
          precision_rank = 0
          ci_rank = 0
          fscore_rank = 0
          inactive_crowd_rank = 0
          if gt_url in market_url_to_rank:
            market_rank = market_url_to_rank[gt_url] + 1
          if gt_url in precision_url_to_rank:
            precision_rank = precision_url_to_rank[gt_url] + 1
          if gt_url in ci_url_to_rank:
            ci_rank = ci_url_to_rank[gt_url] + 1
          if gt_url in fscore_url_to_rank:
            fscore_rank = fscore_url_to_rank[gt_url] + 1
          if gt_url in common_url_to_rank:
            inactive_crowd_rank = common_url_to_rank[gt_url] + 1
          line = '%s\t%s\t%s\t%s\t%s\t%s\t%s\n' % (gt_url, gt_rank + 1,
                                                   market_rank,
                                                   inactive_crowd_rank,
                                                   precision_rank, ci_rank,
                                                   fscore_rank)
          out_file.write(line)


      with open('%sground_truth_rankings_%s.tsv'
                % (info_output_dir, run_params_str), 'w') as output_file:
        for url, count in gt_rankings:
          output_file.write('%s\t%s\n' % (url.strip(), count))
      with open('%smarket_rankings_%s.tsv'
                % (info_output_dir, run_params_str), 'w') as output_file:
        for rank, (url, count) in enumerate(ranks.common_users):
          output_file.write('%s\t%s\t(%s,%s)\n'
                            % (url.strip(), count, rank,
                               ground_truth_url_to_rank[url]))
      with open('%snewsaholic_rankings_%s.tsv'
                % (info_output_dir, run_params_str), 'w') as output_file:
        for rank, (url, count) in enumerate(ranks.newsaholics):
          output_file.write('%s\t%s\t(%s,%s)\n'
                            % (url.strip(), count, rank,
                               ground_truth_url_to_rank[url]))
      with open('%sactive_user_rankings_%s.tsv'
                % (info_output_dir, run_params_str), 'w') as output_file:
        for rank, (url, count) in enumerate(ranks.active_users):
          output_file.write('%s\t%s\t(%s,%s)\n'
                            % (url.strip(), count, rank,
                               ground_truth_url_to_rank[url]))
      with open('%scommon_user_rankings_%s.tsv'
                % (info_output_dir, run_params_str), 'w') as output_file:
        for rank, (url, count) in enumerate(ranks.common_users):
          output_file.write('%s\t%s\t(%s,%s)\n'
                            % (url.strip(), count, rank,
                               ground_truth_url_to_rank[url]))
      with open('%snonexpert_user_rankings_%s.tsv'
                % (info_output_dir, run_params_str), 'w') as output_file:
        for rank, (url, count) in enumerate(ranks.non_experts):
          output_file.write('%s\t%s\t(%s,%s)\n'
                            % (url.strip(), count, rank,
                               ground_truth_url_to_rank[url]))
      with open('%sexpert_p_user_rankings_%s.tsv'
                % (info_output_dir, run_params_str), 'w') as output_file:
        for rank, (url, count) in enumerate(ranks.precision):
          output_file.write('%s\t%s\t(%s,%s)\n'
                            % (url.strip(), count, rank,
                               ground_truth_url_to_rank[url]))
      with open('%sexpert_f_user_rankings_%s.tsv'
                % (info_output_dir, run_params_str), 'w') as output_file:
        for rank, (url, count) in enumerate(ranks.fscore):
          output_file.write('%s\t%s\t(%s,%s)\n'
                            % (url.strip(), count, rank,
                               ground_truth_url_to_rank[url]))
      with open('%sexpert_c_user_rankings_%s.tsv'
                % (info_output_dir, run_params_str), 'w') as output_file:
        for rank, (url, count) in enumerate(ranks.ci):
          output_file.write('%s\t%s\t(%s,%s)\n'
                            % (url.strip(), count, rank,
                               ground_truth_url_to_rank[url]))
      with open('%sexpert_s_user_rankings_%s.tsv'
                % (info_output_dir, run_params_str), 'w') as output_file:
        for rank, (url, count) in enumerate(ranks.super_experts):
          output_file.write('%s\t%s\t(%s,%s)\n'
                            % (url.strip(), count, rank,
                            ground_truth_url_to_rank[url]))
      with open('%smixed_rankings_%s.tsv'
                % (info_output_dir, run_params_str), 'w') as output_file:
        for rank, (url, count) in enumerate(mixed_rankings):
          output_file.write('%s\t%s\t(%s,%s)\n'
                            % (url.strip(), count, rank,
                            ground_truth_url_to_rank[url]))

      with open('../data/FolkWisdom/market_precisions_%s.txt'
                % run_params_str, 'w') as out_file:
        for precision in precisions.common_users:
          out_file.write('%s\n' % precision)

      with open('../data/FolkWisdom/nonexpert_precisions_%s.txt'
                % run_params_str, 'w') as out_file:
        for precision in precisions.non_experts:
          out_file.write('%s\n' % precision)

      with open('../data/FolkWisdom/expert_p_precisions_%s.txt'
                % run_params_str, 'w') as out_file:
        for precision in precisions.precision:
          out_file.write('%s\n' % precision)

      with open('../data/FolkWisdom/expert_f_precisions_%s.txt'
                % run_params_str, 'w') as out_file:
        for precision in precisions.fscore:
          out_file.write('%s\n' % precision)

      with open('../data/FolkWisdom/expert_c_precisions_%s.txt'
                % run_params_str, 'w') as out_file:
        for precision in precisions.ci:
          out_file.write('%s\n' % precision)

      log('Drawing summary precision-recall graphs...')
      # draw_precision_recall_graph(market_precisions, market_recalls,
      precision_recall.draw([precisions.newsaholics, precisions.active_users,
                             precisions.common_users, precisions.precision,
                             precisions.fscore, precisions.ci,
                             precisions.super_experts],
                            [recalls.newsaholics, recalls.active_users,
                             recalls.common_users, recalls.precision,
                             recalls.fscore, recalls.ci, recalls.super_experts],
                            ['Newsaholics', 'Active', 'Common', 'Precision',
                             'F-score', 'CI', 'Super Experts'],
                            'precision_recall_all',
                            run_params_str)

      # Draw via old method because it has fancy markings.
      experts.draw_precision_recall_experts(precisions.non_experts, recalls.non_experts,
                                            precisions.precision, recalls.precision,
                                            precisions.fscore, recalls.fscore,
                                            precisions.ci, recalls.ci,
                                            run_params_str)

      log('Drawing experts precision-recall graph...')
      # precision_recall.draw_with_markers([precisions.population, precisions.non_experts, precisions.precision,
      #                                     precisions.fscore, precisions.ci],
      #                                    [recalls.population, recalls.non_experts, recalls.precision,
      #                                     recalls.fscore, recalls.ci],
      #                                    ['Population', 'Crowd', 'Precision', 'F-score', 'CI'],
      #                                    'precision_recall_experts',
      #                                    0, run_params_str)

      log('Drawing mixed + inact graph...')
      precision_recall.draw_with_markers([precisions.non_experts, precisions.common_users, mixed_inact_precisions],
                                         [recalls.non_experts, recalls.common_users, mixed_inact_recalls],
                                         ['Crowd', 'Inactive Crowd', 'Mixed + Inactive'],
                                         'precision_recall_mixed_and_inactive',
                                         3, run_params_str, zoom=True)

      log('Drawing ci breakdown by followers precisions-recall graph...')
      precision_recall.draw([precisions.non_experts, precisions.ci,
                             precisions.ci_hi, precisions.ci_li],
                            [recalls.non_experts, recalls.ci,
                             recalls.ci_hi, recalls.ci_li],
                            ['Crowd', 'CI', 'CI High', 'CI Low'],
                            'precision_recall_ci_followers_breakdown',
                            run_params_str)

      log('Drawing social bias precision-recall graph...')
      precision_recall.draw([precisions.non_experts, precisions.social_bias,
                             precisions.precision, precisions.fscore,
                             precisions.ci],
                            [recalls.non_experts, recalls.social_bias,
                             recalls.precision, recalls.fscore,
                             recalls.ci],
                            ['Crowd', 'Influence Experts', 'Precision',
                             'F-score', 'CI'],
                            'precision_recall_social_bias',
                            run_params_str)

      log('Drawing basic groups precision-recall graph...')
      precision_recall.draw([precisions.newsaholics, precisions.active_users,
                             precisions.common_users],
                            [recalls.newsaholics, recalls.active_users,
                             recalls.common_users],
                            ['Newsaholics', 'Active Users', 'Common Users'],
                            'precision_recall_basic_groups',
                            run_params_str)

      log('Drawing crowd def precision-recall graph...')
      precision_recall.draw([precisions.non_experts, precisions.common_users],
                            [recalls.non_experts, recalls.common_users],
                            ['Crowd', 'Inactive Crowd'],
                            'precision_recall_crowd_def',
                            run_params_str, zoom=True)

      log('Drawing non_expert_sampling precision-recall graph...')
      precision_recall.draw_with_markers([precisions.non_experts, precisions.non_experts_sampled,
                                          precisions.non_experts_10, precisions.non_experts_25,
                                          precisions.non_experts_1, precisions.ci],
                                          [recalls.non_experts, recalls.non_experts_sampled,
                                           recalls.non_experts_10, recalls.non_experts_25,
                                           recalls.non_experts_1, recalls.ci],
                                          ['Crowd', 'Crowd (33% sample)', 'Crowd (10% sample)',
                                           'Crowd (5% sample)', 'Crowd (2% sample)', 'Experts (CI)'],
                                          'precision_recall_non_expert_sampling',
                                          3, run_params_str, ncol=2)

      # TODO: Replace with new method.
      log('Drawing mixed model precision-recall graph...')
      mixed_model.draw_precision_recall_mixed(precisions.non_experts, recalls.non_experts,
                                              mixed_precisions, mixed_recalls,
                                              run_params_str, zoom=True)

      log('Drawing mixed ci model precision-recall graph...')
      precision_recall.draw([precisions.non_experts, mixed_ci_precisions],
                            [recalls.non_experts, mixed_ci_recalls],
                            ['Crowd', 'Mixed'],
                            'precision_recall_mixed_ci',
                            run_params_str)

      log('Drawing weighted followers precision-recall graph...')
      precision_recall.draw([precisions.non_experts, precisions.weighted_followers, precisions.ci],
                            [recalls.non_experts, recalls.weighted_followers, recalls.ci],
                            ['Crowd', 'Weighted Followers', 'CI'],
                            'precision_recall_weighted_followers',
                            run_params_str)

      log('Drawing ci weighted graph...')
      precision_recall.draw([precisions.population, precisions.ci, precisions.ci_weighted],
                            [recalls.population, recalls.ci, recalls.ci_weighted],
                            ['Crowd', 'CI', 'CI (Weighted)'],
                            'precision_recall_ci_weighted',
                            run_params_str)

      log('Drawing weighted graph...')
      precision_recall.draw([precisions.population, precisions.weighted],
                            [recalls.population, recalls.weighted],
                            ['Crowd', 'Crowd (Weighted)'],
                            'precision_recall_weighted',
                            run_params_str)

      log('Drawing weighted both graph...')
      precision_recall.draw([precisions.population, precisions.weighted, precisions.weighted_both],
                            [recalls.population, recalls.weighted, recalls.weighted_both],
                            ['Crowd', 'Crowd (Weighted)', 'Crowd (Weighted Both)'],
                            'precision_recall_weighted_both',
                            run_params_str)
def magic(should_train=True,
          should_scrape=False,
          data_year_available_from=1993,
          data_year_collect_from=2006):
    # Function(s) that don't have to be executed every time

    # 1. OVA data from sofifa_scraper (Warning: This takes a long time to run)
    #   SOFIFA updates their stat two or three times every month, but they don't change data much
    # Uncomment below to scrape team overall stat data
    if should_scrape:
        scrape_team_ova_all(OVA_FILE_PATH, data_year_collect_from,
                            CURRENT_YEAR)

    # Preprocessing

    # 1. Latest premier league results
    # This data can also be retrieved from http://www.football-data.co.uk/englandm.php
    # Uncomment below to get the latest match results
    get_current_fixtures(RAW_DATA_FILE_PATH_CURRENT)

    # 2. Standings (from 1993 to curent year)
    # Uncomment below to run the function
    get_rankings_all(data_year_available_from, CURRENT_YEAR,
                     RAW_CLEANED_DATA_FILE_PATH, STANDINGS_PATH)

    # Run the functions below to start generating necessary data

    # 1. From raw data, remove all data but the selected columns.
    # Produces: cleaned data csv located in CLEANED_DATA_FILE_PATH
    clean_all(RAW_DATA_FILE_PATH, RAW_CLEANED_DATA_FILE_PATH,
              data_year_available_from, CURRENT_YEAR)

    # 2. From 1, add Overall Rating columns
    # Produces: cleaned csv modified, located in CLEANED_DATA_FILE_PATH. Now all cleaned csv from 2006-2018 have OVA column.
    merge_ova_to_cleaned_all(OVA_FILE_PATH, RAW_CLEANED_DATA_FILE_PATH,
                             data_year_collect_from, CURRENT_YEAR)

    # 3. From 2, copy cleaned raw data to cleaned data for prediction purpose
    # Produces: copy csv from RAW_CLEANED_DATA_FILE_PATH to CLEANED_DATA_FILE_PATH
    copy_csv(RAW_CLEANED_DATA_FILE_PATH, CLEANED_DATA_FILE_PATH)

    # 4. From 3, add current status columns (current point, current goal for,against,difference, match played, losing/winning streaks, last 5 games)
    # Produces: cleaned csv modified, located in CLEANED_DATA_FILE_PATH. Now all cleaned csv from 1993-2018 have additional columns
    add_current_details_all(CLEANED_DATA_FILE_PATH, CLEANED_DATA_FILE_PATH,
                            STANDINGS_PATH, data_year_available_from,
                            CURRENT_YEAR, data_year_available_from)

    # 5. From 4, merge all csv files from startYear to endYear together.
    # FOR NOW, I only collect data from 2006 because sofifa only provides ova data from 2006, and model tends to perform better with this approach
    # Produces: new csv file on FINAL_FILE
    combine_matches(CLEANED_DATA_FILE_PATH, FINAL_FILE, data_year_collect_from,
                    CURRENT_YEAR)

    # 6. From 5, get all head-to-head results (match results against the other team over time)
    # Produces: editted final.csv file under DATA_PATH
    get_match_results_against(FINAL_FILE, CLEANED_DATA_FILE_PATH, DATA_PATH,
                              data_year_available_from, CURRENT_YEAR)

    # 7. Once all data is aggregated, we can now build a classifer that make preidctions.
    # If 'recalculate' is set True, it runs multiple classifiers on this data,
    # and do some grid search on it if necessary, and finally generates 'model confidence.csv' that records confidence score of each classifier.
    # If 'recalculate' is set False, and if clf_file exists, then it simply loads the clf from clf_file.
    # Produces: returns the best clf.
    best_clf, _, best_clf_average = get_clf(FINAL_FILE,
                                            CONFIDENCE_FILE,
                                            CLF_FILE,
                                            recalculate=should_train)

    # 8. Now we make prediction. This process is done by first predicting the upcoming round, then aggregate the result, then predict the next,
    # and repeat the process until there are no more games to predict. "predict_next_round" also produces prediction probabilities
    # for each matches on stat_path.
    #  - 1. predict_next_round predicts next round and save the result in RAW_CLEANED_DATA_FILE_PATH_CURRENT.
    #  - 2. add_current_details, as its name suggests, it adds current details.
    #  - 3. combine_matches combine all matches from 2006 to 2018
    #  - 4. get_match_results_against adds head-to-head results between two teams for each match
    is_first = True

    # First save current ranking before predicting results
    remove_directory(STATISTICS_PATH)
    now = datetime.datetime.now().date().strftime('%Y-%m-%d')
    pred_ranking_round_file = os.path.join(
        PRED_RANKING_ROUND_PATH, 'prediction_ranking_{}.csv'.format(now))
    get_rankings(RAW_CLEANED_DATA_FILE_PATH_CURRENT,
                 pred_ranking_round_file,
                 include_prediction=True,
                 predicted_date_so_far=now,
                 ranking_summary_file=PRED_RANKING_ROUND_SUMMARY_FILE)

    while True:
        is_next_round, date = predict_next_round(
            best_clf,
            FINAL_FILE,
            RAW_CLEANED_DATA_FILE_PATH_CURRENT,
            statistics=True,
            stat_path=PREDICTION_FILE,
            first=is_first)
        if not is_next_round:
            break
        add_current_details(RAW_CLEANED_DATA_FILE_PATH_CURRENT,
                            CLEANED_DATA_FILE_PATH_CURRENT, STANDINGS_PATH,
                            data_year_available_from)
        combine_matches(CLEANED_DATA_FILE_PATH, FINAL_FILE,
                        data_year_collect_from, CURRENT_YEAR)
        get_match_results_against(FINAL_FILE, CLEANED_DATA_FILE_PATH,
                                  DATA_PATH, data_year_available_from,
                                  CURRENT_YEAR)
        pred_ranking_round_file = os.path.join(
            PRED_RANKING_ROUND_PATH, 'prediction_ranking_{}.csv'.format(date))
        get_rankings(PREDICTION_FILE,
                     pred_ranking_round_file,
                     include_prediction=True,
                     predicted_date_so_far=date,
                     ranking_summary_file=PRED_RANKING_ROUND_SUMMARY_FILE)
        is_first = False

    # 9. Now prediction is done. Produce a season standing with using the prediction result.
    winning_team = get_rankings(PREDICTION_FILE,
                                PRED_RANKING_FILE,
                                include_prediction=True)

    # 10. Put previous results, prediction results, standing predictions to the database
    save_new_data_to_database(DATABASE_PATH, FINAL_FILE, PREDICTION_FILE,
                              PRED_RANKING_ROUND_SUMMARY_FILE)

    # 11. Summary to database
    if should_train:
        save_summary_to_database(DATABASE_PATH, best_clf_average, winning_team)
Exemplo n.º 7
0
def run():
    """Contains the main logic for this analysis."""
    FileLog.set_log_dir()

    seeds = Util.load_seeds()
    for category in _CATEGORIES:
        log('Preforming analysis for category: %s' % category)
        size_top_news = _SIZE_TOP_NEWS
        if category:
            size_top_news = .10

        data_set = DataSet.TESTING
        retweets = set()
        if _SWITCHED:
            data_set = DataSet.TRAINING
        if _EXCLUDE_RETWEETS:
            retweets = ground_truths.find_retweets(_TESTING_SET_MONTHS)
        log('Num retweets to exclude: %s' % len(retweets))
        gt_rankings = ground_truths.get_gt_rankings(
            seeds,
            data_set,
            category,
            exclude_tweets_within_delta=_EXCLUDE_TWEETS_WITHIN_DELTA,
            retweets=retweets)
        log('Num ground_truth_rankings: %s' % len(gt_rankings))

        # Format for use later.
        ground_truth_url_to_rank = {}
        for rank, (url, count) in enumerate(gt_rankings):
            ground_truth_url_to_rank[url] = rank

        target_news = ground_truths.find_target_news(gt_rankings,
                                                     size_top_news)
        log('Size target_news: %s' % len(target_news))

        for delta in _DELTAS:
            run_params_str = 'd%s_t%s_e%s_%s' % (delta, int(
                size_top_news * 100), int(_SIZE_EXPERTS * 100), category)
            info_output_dir = '../graph/FolkWisdom/%s/info/' % run_params_str
            Util.ensure_dir_exist(info_output_dir)

            groups, d_num_followers = user_groups.get_all_user_groups(
                delta, category)
            log('Num experts (precision): %s' % len(groups.precision))
            log('Num experts (fscore): %s' % len(groups.fscore))
            log('Num experts (ci): %s' % len(groups.ci))
            log('Num Super Experts: %s' % len(groups.super_experts))
            log('Num Social Bias Experts: %s' % len(groups.social_bias))

            log('Finding rankings with an %s hour delta.' % delta)
            ranks = rankings.get_rankings(delta, seeds, groups, category,
                                          d_num_followers)

            # Output some interesting info to file
            size_market_unfiltered = '0'
            with open('../data/FolkWisdom/size_of_market_unfiltered.txt'
                      ) as in_file:
                size_market_unfiltered = in_file.readline().strip()

            with open(
                    '%suser_demographics_%s.txt' %
                (info_output_dir, run_params_str), 'w') as output_file:
                output_file.write('Number of Newsaholics: %s\n' %
                                  len(groups.newsaholics))
                output_file.write('Number of Active Users: %s\n' %
                                  len(groups.active_users))
                output_file.write('Number of Common Users: %s\n' %
                                  len(groups.common_users))
                output_file.write('\n')
                output_file.write('Number of Precision Experts: %s\n' %
                                  len(groups.precision))
                output_file.write('Number of F-Score Experts: %s\n' %
                                  len(groups.fscore))
                output_file.write('Number of CI Experts: %s\n' %
                                  len(groups.ci))
                output_file.write('Number of Social Bias Experts: %s\n' %
                                  len(groups.social_bias))
                output_file.write('Total number of unique experts: %s\n' %
                                  len(groups.all_experts))
                output_file.write(
                    'Number of Precision and F-Score Experts: %s\n' %
                    len(groups.precision.intersection(groups.fscore)))
                output_file.write(
                    'Number of Precision and CI Experts: %s\n' %
                    len(groups.precision.intersection(groups.ci)))
                output_file.write('Number of F-Score and CI Experts: %s\n' %
                                  len(groups.fscore.intersection(groups.ci)))
                output_file.write('Number of Super Experts: %s\n' %
                                  len(groups.super_experts))
                output_file.write('\n')
                output_file.write(
                    'Number of Users (Total): %s\n' %
                    (len(groups.newsaholics) + len(groups.active_users) +
                     len(groups.common_users)))
                output_file.write('Size of market (unfiltered): %s\n' %
                                  size_market_unfiltered)
                output_file.write('\n')
                # output_file.write('Number of votes by Newsaholics: %s\n'
                #                   % num_votes_newsaholics)
                # output_file.write('Number of votes by Market: %s\n' % num_votes_market)
                # output_file.write('Number of votes by Active Users: %s\n'
                #                   % num_votes_active)
                # output_file.write('Number of votes by Common Users: %s\n'
                #                   % num_votes_common)
                # output_file.write('\n');
                # output_file.write('Number of votes by Expert (Precision) Users: %s\n'
                #         % num_votes_expert_precision)
                # output_file.write('Number of votes by Expert (fscore) Users: %s\n'
                #         % num_votes_expert_fscore)
                # output_file.write('Number of votes by Expert (ci) Users: %s\n'
                #         % num_votes_expert_ci)
                # output_file.write('Number of votes by Super Experts: %s\n'
                #                   % num_votes_expert_s)
                # output_file.write('Number of votes by Social Bias Experts: %s\n'
                #                   % num_votes_expert_sb)
                # output_file.write('\n')
                # output_file.write('Total Number of votes cast: %s\n'
                #                   % (num_votes_newsaholics + num_votes_active
                #                      + num_votes_common))
                # output_file.write('\n')
                output_file.write('Total Number of Good News: %s\n' %
                                  len(target_news))

            log('Ground Truth Top 50')
            for i in range(min(len(gt_rankings), 50)):
                url, count = gt_rankings[i]
                log('[%s] %s\t%s' % (i, url.strip(), count))
            log('-----------------------------------')
            log('Newsaholic Top 5')
            for i in range(min(len(ranks.newsaholics), 5)):
                url, count = ranks.newsaholics[i]
                log('[%s] %s\t%s' % (i, url.strip(), count))
            log('-----------------------------------')
            log('Active Top 5')
            for i in range(min(len(ranks.active_users), 5)):
                url, count = ranks.active_users[i]
                log('[%s] %s\t%s' % (i, url.strip(), count))
            log('-----------------------------------')
            log('Common Top 5')
            for i in range(min(len(ranks.common_users), 5)):
                url, count = ranks.common_users[i]
                log('[%s] %s\t%s' % (i, url.strip(), count))
            log('-----------------------------------')
            log('nonexpert Top 5')
            for i in range(min(len(ranks.non_experts), 5)):
                url, count = ranks.non_experts[i]
                log('[%s] %s\t%s' % (i, url.strip(), count))
            log('-----------------------------------')
            log('Expert (Precision) Top 5')
            for i in range(min(len(ranks.precision), 5)):
                url, count = ranks.precision[i]
                log('[%s] %s\t%s' % (i, url.strip(), count))
            log('-----------------------------------')
            log('Expert (fscore) Top 5')
            for i in range(min(len(ranks.fscore), 5)):
                url, count = ranks.fscore[i]
                log('[%s] %s\t%s' % (i, url.strip(), count))
            log('-----------------------------------')
            log('Expert (ci) Top 5')
            for i in range(min(len(ranks.ci), 5)):
                url, count = ranks.ci[i]
                log('[%s] %s\t%s' % (i, url.strip(), count))
            log('-----------------------------------')
            log('Super Expert Top 5')
            for i in range(min(len(ranks.super_experts), 5)):
                url, count = ranks.super_experts[i]
                log('[%s] %s\t%s' % (i, url.strip(), count))
            log('-----------------------------------')
            log('Social Bias Expert Top 5')
            for i in range(min(len(ranks.social_bias), 5)):
                url, count = ranks.social_bias[i]
                log('[%s] %s\t%s' % (i, url.strip(), count))

            market_rank_to_url = {}
            newsaholic_rank_to_url = {}
            active_rank_to_url = {}
            common_rank_to_url = {}
            expert_p_rank_to_url = {}
            expert_f_rank_to_url = {}
            expert_c_rank_to_url = {}
            expert_s_rank_to_url = {}
            for rank, (url, count) in enumerate(ranks.newsaholics):
                newsaholic_rank_to_url[rank] = url
            for rank, (url, count) in enumerate(ranks.population):
                market_rank_to_url[rank] = url
            for rank, (url, count) in enumerate(ranks.active_users):
                active_rank_to_url[rank] = url
            for rank, (url, count) in enumerate(ranks.common_users):
                common_rank_to_url[rank] = url
            for rank, (url, count) in enumerate(ranks.precision):
                expert_p_rank_to_url[rank] = url
            for rank, (url, count) in enumerate(ranks.fscore):
                expert_f_rank_to_url[rank] = url
            for rank, (url, count) in enumerate(ranks.ci):
                expert_c_rank_to_url[rank] = url
            for rank, (url, count) in enumerate(ranks.super_experts):
                expert_s_rank_to_url[rank] = url

            population_url_to_rank = {}
            market_url_to_rank = {}
            precision_url_to_rank = {}
            fscore_url_to_rank = {}
            ci_url_to_rank = {}
            ci_1_url_to_rank = {}
            ci_2_url_to_rank = {}
            ci_3_url_to_rank = {}
            common_url_to_rank = {}
            for rank, (url, count) in enumerate(ranks.population):
                population_url_to_rank[url] = rank
            for rank, (url, count) in enumerate(ranks.non_experts):
                market_url_to_rank[url] = rank
            for rank, (url, count) in enumerate(ranks.precision):
                precision_url_to_rank[url] = rank
            for rank, (url, count) in enumerate(ranks.fscore):
                fscore_url_to_rank[url] = rank
            for rank, (url, count) in enumerate(ranks.ci):
                ci_url_to_rank[url] = rank
            for rank, (url, count) in enumerate(ranks.ci_1):
                ci_1_url_to_rank[url] = rank
            for rank, (url, count) in enumerate(ranks.ci_2):
                ci_2_url_to_rank[url] = rank
            for rank, (url, count) in enumerate(ranks.ci_3):
                ci_3_url_to_rank[url] = rank
            for rank, (url, count) in enumerate(ranks.common_users):
                common_url_to_rank[url] = rank

            precisions, recalls = precision_recall.get_precision_recalls(
                gt_rankings, ranks)

            mixed_rankings = mixed_model.get_mixed_rankings(
                market_url_to_rank, precisions.non_experts,
                precision_url_to_rank, precisions.precision,
                fscore_url_to_rank, precisions.fscore, ci_url_to_rank,
                precisions.ci, ground_truth_url_to_rank)

            mixed_inact_rankings = mixed_model.get_mixed_rankings(
                common_url_to_rank, precisions.common_users,
                precision_url_to_rank, precisions.precision,
                fscore_url_to_rank, precisions.fscore, ci_url_to_rank,
                precisions.ci, ground_truth_url_to_rank)

            mixed_ci_rankings = mixed_model.get_mixed_rankings(
                market_url_to_rank, precisions.non_experts, ci_1_url_to_rank,
                precisions.ci_1, ci_2_url_to_rank, precisions.ci_2,
                ci_3_url_to_rank, precisions.ci_3, ground_truth_url_to_rank)

            mixed_precisions, mixed_recalls = precision_recall.calc_precision_recall(
                gt_rankings, mixed_rankings)

            mixed_inact_precisions, mixed_inact_recalls = precision_recall.calc_precision_recall(
                gt_rankings, mixed_inact_rankings)

            mixed_ci_precisions, mixed_ci_recalls = precision_recall.calc_precision_recall(
                gt_rankings, mixed_ci_rankings)

            log('-----------------------------------')
            log('Mixed (min) Top 5')
            for i in range(min(len(mixed_rankings), 5)):
                url, count = mixed_rankings[i]
                log('[%s] %s\t%s' % (i + 1, url, count))
            log('-----------------------------------')

            with open(
                    '%sranking_comparisons_%s.tsv' %
                (info_output_dir, run_params_str), 'w') as out_file:
                for gt_rank, (gt_url, _) in enumerate(gt_rankings):
                    market_rank = 0
                    precision_rank = 0
                    ci_rank = 0
                    fscore_rank = 0
                    inactive_crowd_rank = 0
                    if gt_url in market_url_to_rank:
                        market_rank = market_url_to_rank[gt_url] + 1
                    if gt_url in precision_url_to_rank:
                        precision_rank = precision_url_to_rank[gt_url] + 1
                    if gt_url in ci_url_to_rank:
                        ci_rank = ci_url_to_rank[gt_url] + 1
                    if gt_url in fscore_url_to_rank:
                        fscore_rank = fscore_url_to_rank[gt_url] + 1
                    if gt_url in common_url_to_rank:
                        inactive_crowd_rank = common_url_to_rank[gt_url] + 1
                    line = '%s\t%s\t%s\t%s\t%s\t%s\t%s\n' % (
                        gt_url, gt_rank + 1, market_rank, inactive_crowd_rank,
                        precision_rank, ci_rank, fscore_rank)
                    out_file.write(line)

            with open(
                    '%sground_truth_rankings_%s.tsv' %
                (info_output_dir, run_params_str), 'w') as output_file:
                for url, count in gt_rankings:
                    output_file.write('%s\t%s\n' % (url.strip(), count))
            with open(
                    '%smarket_rankings_%s.tsv' %
                (info_output_dir, run_params_str), 'w') as output_file:
                for rank, (url, count) in enumerate(ranks.common_users):
                    output_file.write('%s\t%s\t(%s,%s)\n' %
                                      (url.strip(), count, rank,
                                       ground_truth_url_to_rank[url]))
            with open(
                    '%snewsaholic_rankings_%s.tsv' %
                (info_output_dir, run_params_str), 'w') as output_file:
                for rank, (url, count) in enumerate(ranks.newsaholics):
                    output_file.write('%s\t%s\t(%s,%s)\n' %
                                      (url.strip(), count, rank,
                                       ground_truth_url_to_rank[url]))
            with open(
                    '%sactive_user_rankings_%s.tsv' %
                (info_output_dir, run_params_str), 'w') as output_file:
                for rank, (url, count) in enumerate(ranks.active_users):
                    output_file.write('%s\t%s\t(%s,%s)\n' %
                                      (url.strip(), count, rank,
                                       ground_truth_url_to_rank[url]))
            with open(
                    '%scommon_user_rankings_%s.tsv' %
                (info_output_dir, run_params_str), 'w') as output_file:
                for rank, (url, count) in enumerate(ranks.common_users):
                    output_file.write('%s\t%s\t(%s,%s)\n' %
                                      (url.strip(), count, rank,
                                       ground_truth_url_to_rank[url]))
            with open(
                    '%snonexpert_user_rankings_%s.tsv' %
                (info_output_dir, run_params_str), 'w') as output_file:
                for rank, (url, count) in enumerate(ranks.non_experts):
                    output_file.write('%s\t%s\t(%s,%s)\n' %
                                      (url.strip(), count, rank,
                                       ground_truth_url_to_rank[url]))
            with open(
                    '%sexpert_p_user_rankings_%s.tsv' %
                (info_output_dir, run_params_str), 'w') as output_file:
                for rank, (url, count) in enumerate(ranks.precision):
                    output_file.write('%s\t%s\t(%s,%s)\n' %
                                      (url.strip(), count, rank,
                                       ground_truth_url_to_rank[url]))
            with open(
                    '%sexpert_f_user_rankings_%s.tsv' %
                (info_output_dir, run_params_str), 'w') as output_file:
                for rank, (url, count) in enumerate(ranks.fscore):
                    output_file.write('%s\t%s\t(%s,%s)\n' %
                                      (url.strip(), count, rank,
                                       ground_truth_url_to_rank[url]))
            with open(
                    '%sexpert_c_user_rankings_%s.tsv' %
                (info_output_dir, run_params_str), 'w') as output_file:
                for rank, (url, count) in enumerate(ranks.ci):
                    output_file.write('%s\t%s\t(%s,%s)\n' %
                                      (url.strip(), count, rank,
                                       ground_truth_url_to_rank[url]))
            with open(
                    '%sexpert_s_user_rankings_%s.tsv' %
                (info_output_dir, run_params_str), 'w') as output_file:
                for rank, (url, count) in enumerate(ranks.super_experts):
                    output_file.write('%s\t%s\t(%s,%s)\n' %
                                      (url.strip(), count, rank,
                                       ground_truth_url_to_rank[url]))
            with open(
                    '%smixed_rankings_%s.tsv' %
                (info_output_dir, run_params_str), 'w') as output_file:
                for rank, (url, count) in enumerate(mixed_rankings):
                    output_file.write('%s\t%s\t(%s,%s)\n' %
                                      (url.strip(), count, rank,
                                       ground_truth_url_to_rank[url]))

            with open(
                    '../data/FolkWisdom/market_precisions_%s.txt' %
                    run_params_str, 'w') as out_file:
                for precision in precisions.common_users:
                    out_file.write('%s\n' % precision)

            with open(
                    '../data/FolkWisdom/nonexpert_precisions_%s.txt' %
                    run_params_str, 'w') as out_file:
                for precision in precisions.non_experts:
                    out_file.write('%s\n' % precision)

            with open(
                    '../data/FolkWisdom/expert_p_precisions_%s.txt' %
                    run_params_str, 'w') as out_file:
                for precision in precisions.precision:
                    out_file.write('%s\n' % precision)

            with open(
                    '../data/FolkWisdom/expert_f_precisions_%s.txt' %
                    run_params_str, 'w') as out_file:
                for precision in precisions.fscore:
                    out_file.write('%s\n' % precision)

            with open(
                    '../data/FolkWisdom/expert_c_precisions_%s.txt' %
                    run_params_str, 'w') as out_file:
                for precision in precisions.ci:
                    out_file.write('%s\n' % precision)

            log('Drawing summary precision-recall graphs...')
            # draw_precision_recall_graph(market_precisions, market_recalls,
            precision_recall.draw([
                precisions.newsaholics, precisions.active_users,
                precisions.common_users, precisions.precision,
                precisions.fscore, precisions.ci, precisions.super_experts
            ], [
                recalls.newsaholics, recalls.active_users,
                recalls.common_users, recalls.precision, recalls.fscore,
                recalls.ci, recalls.super_experts
            ], [
                'Newsaholics', 'Active', 'Common', 'Precision', 'F-score',
                'CI', 'Super Experts'
            ], 'precision_recall_all', run_params_str)

            # Draw via old method because it has fancy markings.
            experts.draw_precision_recall_experts(
                precisions.non_experts, recalls.non_experts,
                precisions.precision, recalls.precision, precisions.fscore,
                recalls.fscore, precisions.ci, recalls.ci, run_params_str)

            log('Drawing experts precision-recall graph...')
            # precision_recall.draw_with_markers([precisions.population, precisions.non_experts, precisions.precision,
            #                                     precisions.fscore, precisions.ci],
            #                                    [recalls.population, recalls.non_experts, recalls.precision,
            #                                     recalls.fscore, recalls.ci],
            #                                    ['Population', 'Crowd', 'Precision', 'F-score', 'CI'],
            #                                    'precision_recall_experts',
            #                                    0, run_params_str)

            log('Drawing mixed + inact graph...')
            precision_recall.draw_with_markers(
                [
                    precisions.non_experts, precisions.common_users,
                    mixed_inact_precisions
                ], [
                    recalls.non_experts, recalls.common_users,
                    mixed_inact_recalls
                ], ['Crowd', 'Inactive Crowd', 'Mixed + Inactive'],
                'precision_recall_mixed_and_inactive',
                3,
                run_params_str,
                zoom=True)

            log('Drawing ci breakdown by followers precisions-recall graph...')
            precision_recall.draw([
                precisions.non_experts, precisions.ci, precisions.ci_hi,
                precisions.ci_li
            ], [recalls.non_experts, recalls.ci, recalls.ci_hi, recalls.ci_li],
                                  ['Crowd', 'CI', 'CI High', 'CI Low'],
                                  'precision_recall_ci_followers_breakdown',
                                  run_params_str)

            log('Drawing social bias precision-recall graph...')
            precision_recall.draw([
                precisions.non_experts, precisions.social_bias,
                precisions.precision, precisions.fscore, precisions.ci
            ], [
                recalls.non_experts, recalls.social_bias, recalls.precision,
                recalls.fscore, recalls.ci
            ], ['Crowd', 'Influence Experts', 'Precision', 'F-score', 'CI'],
                                  'precision_recall_social_bias',
                                  run_params_str)

            log('Drawing basic groups precision-recall graph...')
            precision_recall.draw([
                precisions.newsaholics, precisions.active_users,
                precisions.common_users
            ], [
                recalls.newsaholics, recalls.active_users, recalls.common_users
            ], ['Newsaholics', 'Active Users', 'Common Users'],
                                  'precision_recall_basic_groups',
                                  run_params_str)

            log('Drawing crowd def precision-recall graph...')
            precision_recall.draw(
                [precisions.non_experts, precisions.common_users],
                [recalls.non_experts, recalls.common_users],
                ['Crowd', 'Inactive Crowd'],
                'precision_recall_crowd_def',
                run_params_str,
                zoom=True)

            log('Drawing non_expert_sampling precision-recall graph...')
            precision_recall.draw_with_markers(
                [
                    precisions.non_experts, precisions.non_experts_sampled,
                    precisions.non_experts_10, precisions.non_experts_25,
                    precisions.non_experts_1, precisions.ci
                ], [
                    recalls.non_experts, recalls.non_experts_sampled,
                    recalls.non_experts_10, recalls.non_experts_25,
                    recalls.non_experts_1, recalls.ci
                ], [
                    'Crowd', 'Crowd (33% sample)', 'Crowd (10% sample)',
                    'Crowd (5% sample)', 'Crowd (2% sample)', 'Experts (CI)'
                ],
                'precision_recall_non_expert_sampling',
                3,
                run_params_str,
                ncol=2)

            # TODO: Replace with new method.
            log('Drawing mixed model precision-recall graph...')
            mixed_model.draw_precision_recall_mixed(precisions.non_experts,
                                                    recalls.non_experts,
                                                    mixed_precisions,
                                                    mixed_recalls,
                                                    run_params_str,
                                                    zoom=True)

            log('Drawing mixed ci model precision-recall graph...')
            precision_recall.draw(
                [precisions.non_experts, mixed_ci_precisions],
                [recalls.non_experts, mixed_ci_recalls], ['Crowd', 'Mixed'],
                'precision_recall_mixed_ci', run_params_str)

            log('Drawing weighted followers precision-recall graph...')
            precision_recall.draw([
                precisions.non_experts, precisions.weighted_followers,
                precisions.ci
            ], [recalls.non_experts, recalls.weighted_followers, recalls.ci],
                                  ['Crowd', 'Weighted Followers', 'CI'],
                                  'precision_recall_weighted_followers',
                                  run_params_str)

            log('Drawing ci weighted graph...')
            precision_recall.draw(
                [precisions.population, precisions.ci, precisions.ci_weighted],
                [recalls.population, recalls.ci, recalls.ci_weighted],
                ['Crowd', 'CI', 'CI (Weighted)'],
                'precision_recall_ci_weighted', run_params_str)

            log('Drawing weighted graph...')
            precision_recall.draw([precisions.population, precisions.weighted],
                                  [recalls.population, recalls.weighted],
                                  ['Crowd', 'Crowd (Weighted)'],
                                  'precision_recall_weighted', run_params_str)

            log('Drawing weighted both graph...')
            precision_recall.draw(
                [
                    precisions.population, precisions.weighted,
                    precisions.weighted_both
                ],
                [recalls.population, recalls.weighted, recalls.weighted_both],
                ['Crowd', 'Crowd (Weighted)', 'Crowd (Weighted Both)'],
                'precision_recall_weighted_both', run_params_str)