Пример #1
0
def run():
  """Main logic. Outputs data in format for further analysis."""
  global _OUT_DIR
  cache = Util.load_cache()
  seeds = Util.load_seeds()

  # Set up params appropriately.
  data_set = DataSet.TRAINING
  months = _TRAINING_SET_MONTHS
  if _SWITCHED:
    data_set = DataSet.TESTING
    months = _TESTING_SET_MONTHS
    _OUT_DIR += 'switched/'
  retweets = set()
  if _EXCLUDE_RETWEETS:
    retweets = ground_truths.find_retweets(months)
    _OUT_DIR += 'no_retweets/'

  Util.ensure_dir_exist(_OUT_DIR)
  log('Output dir: %s' % _OUT_DIR)

  for delta in _DELTAS:
    for category in _CATEGORIES:
      gt_rankings = ground_truths.get_gt_rankings(seeds, data_set, category,
                                                  exclude_tweets_within_delta=_EXCLUDE_TWEETS_WITHIN_DELTA,
                                                  retweets=retweets)
      sort_users_by_tweet_count(months, seeds, cache, delta, category)
      target_news = ground_truths.find_target_news(gt_rankings, _SIZE_TOP_NEWS)
      find_hits_and_mises(months, target_news, seeds, cache,
                          delta, category)
#      if _SWITCHED:
#        gt_rankings = ground_truths.get_gt_rankings(seeds, DataSet.TESTING,
#                                                    category)
#        sort_users_by_tweet_count(_TESTING_SET_MONTHS, seeds, cache,
#                                  delta, category)
#        target_news = ground_truths.find_target_news(gt_rankings, .02)
#        find_hits_and_mises(_TESTING_SET_MONTHS, target_news, seeds, cache,
#                            delta, category)
#      else:
#        gt_rankings = ground_truths.get_gt_rankings(seeds, DataSet.TRAINING,
#                                                    category)
#        sort_users_by_tweet_count(_TRAINING_SET_MONTHS, seeds, cache,
#                                  delta, category)
#        target_news = ground_truths.find_target_news(gt_rankings, .02)
#        find_hits_and_mises(_TRAINING_SET_MONTHS, target_news, seeds, cache,
#                            delta, category)

  log('Finished outputting data!')
Пример #2
0
def get_all_user_groups(delta=4, category=None):
  seeds = Util.load_seeds()

  # Set up params appropriately.
  data_set = DataSet.TRAINING
  months = _TRAINING_SET_MONTHS
  if _SWITCHED:
    data_set = DataSet.TESTING
    months = _TESTING_SET_MONTHS
  retweets = set()
  if _EXCLUDE_RETWEETS:
    retweets = ground_truths.find_retweets(months)

  gt_rankings = ground_truths.get_gt_rankings(seeds, data_set, category,
                                              exclude_tweets_within_delta=_EXCLUDE_TWEETS_WITHIN_DELTA,
                                              retweets=retweets)
  target_news = ground_truths.find_target_news(gt_rankings, _SIZE_TOP_NEWS)

  groups = UserGroups()

  (num_users, groups.newsaholics,
   groups.active_users,
   groups.common_users) = basic_groups.group_users(delta, category)
  groups.population = groups.newsaholics.union(groups.active_users).union(groups.common_users)

  num_users_eg, groups.even_groups = even_groups.group_users(delta,
                                                             _NUM_GROUPS,
                                                             _SIZE_OF_GROUP_IN_PERCENT,
                                                             category)

  groups.precision = experts.select_experts_precision(
      groups.newsaholics.union(groups.active_users), num_users, delta,
      _SIZE_EXPERTS, category)
  groups.fscore = experts.select_experts_fscore(len(target_news),
                                                num_users,
                                                delta, _SIZE_EXPERTS,
                                                category)
  groups.ci = experts.select_experts_ci(num_users, delta, _SIZE_EXPERTS,
                                        category)
  groups.super_experts = experts.select_super_experts(groups.precision,
                                                      groups.fscore,
                                                      groups.ci)

  groups.ci_hi, groups.ci_li = experts.split_ci_experts_by_followers(groups.ci)

  groups.ci_1 = set()
  groups.ci_2 = set()
  groups.ci_3 = set()
  counter = 0
  for ci_expert in groups.ci:
    if counter % 3 == 0:
      groups.ci_1.add(ci_expert)
    elif counter % 3 == 1:
      groups.ci_2.add(ci_expert)
    elif counter % 3 == 2:
      groups.ci_3.add(ci_expert)
    counter += 1

  groups.social_bias, d_num_followers  = experts.select_experts_social_bias(num_users,
                                                                            _SIZE_EXPERTS)
  groups.all_experts = experts.select_all_experts(groups.precision,
                                                  groups.fscore,
                                                  groups.ci)
  groups.non_experts = groups.population.difference(groups.all_experts)
  sample_size = int(len(groups.non_experts) * _NON_EXPERTS_SAMPLE_SIZE)
  sample_size_25 = int(len(groups.non_experts) * 0.05)
  sample_size_10 = int(len(groups.non_experts) * 0.10)
  sample_size_1 = int(len(groups.non_experts) * 0.02)
  groups.non_experts_sampled = set(random.sample(groups.non_experts, sample_size))
  groups.non_experts_25 = set(random.sample(groups.non_experts, sample_size_25))
  groups.non_experts_10 = set(random.sample(groups.non_experts, sample_size_10))
  groups.non_experts_1 = set(random.sample(groups.non_experts, sample_size_1))

  return groups, d_num_followers
Пример #3
0
def run():
  """Contains the main logic for this analysis."""
  FileLog.set_log_dir()

  seeds = Util.load_seeds()
  for category in _CATEGORIES:
    log('Preforming analysis for category: %s' % category)
    size_top_news = _SIZE_TOP_NEWS
    if category:
      size_top_news = .10

    data_set = DataSet.TESTING
    retweets = set()
    if _SWITCHED:
      data_set = DataSet.TRAINING
    if _EXCLUDE_RETWEETS:
      retweets = ground_truths.find_retweets(_TESTING_SET_MONTHS)
    log('Num retweets to exclude: %s' % len(retweets))
    gt_rankings = ground_truths.get_gt_rankings(seeds, data_set, category,
                                                exclude_tweets_within_delta=_EXCLUDE_TWEETS_WITHIN_DELTA,
                                                retweets=retweets)
    log('Num ground_truth_rankings: %s' % len(gt_rankings))

    # Format for use later.
    ground_truth_url_to_rank = {}
    for rank, (url, count) in enumerate(gt_rankings):
      ground_truth_url_to_rank[url] = rank

    target_news = ground_truths.find_target_news(gt_rankings, size_top_news)
    log('Size target_news: %s' % len(target_news))

    for delta in _DELTAS:
      run_params_str = 'd%s_t%s_e%s_%s' % (delta, int(size_top_news * 100),
                                           int(_SIZE_EXPERTS * 100), category)
      info_output_dir = '../graph/FolkWisdom/%s/info/' % run_params_str
      Util.ensure_dir_exist(info_output_dir)


      groups, d_num_followers  = user_groups.get_all_user_groups(delta, category)
      log('Num experts (precision): %s' % len(groups.precision))
      log('Num experts (fscore): %s' % len(groups.fscore))
      log('Num experts (ci): %s' % len(groups.ci))
      log('Num Super Experts: %s' %len(groups.super_experts))
      log('Num Social Bias Experts: %s' % len(groups.social_bias))

      log('Finding rankings with an %s hour delta.' % delta)
      ranks = rankings.get_rankings(delta, seeds, groups, category, d_num_followers)

      # Output some interesting info to file
      size_market_unfiltered = '0'
      with open('../data/FolkWisdom/size_of_market_unfiltered.txt') as in_file:
        size_market_unfiltered = in_file.readline().strip()

      with open('%suser_demographics_%s.txt'
                % (info_output_dir, run_params_str), 'w') as output_file:
        output_file.write('Number of Newsaholics: %s\n' % len(groups.newsaholics))
        output_file.write('Number of Active Users: %s\n' % len(groups.active_users))
        output_file.write('Number of Common Users: %s\n' % len(groups.common_users))
        output_file.write('\n');
        output_file.write('Number of Precision Experts: %s\n' % len(groups.precision))
        output_file.write('Number of F-Score Experts: %s\n' % len(groups.fscore))
        output_file.write('Number of CI Experts: %s\n' % len(groups.ci))
        output_file.write('Number of Social Bias Experts: %s\n' % len(groups.social_bias))
        output_file.write('Total number of unique experts: %s\n' % len(groups.all_experts))
        output_file.write('Number of Precision and F-Score Experts: %s\n'
                          % len(groups.precision.intersection(groups.fscore)))
        output_file.write('Number of Precision and CI Experts: %s\n'
                          % len(groups.precision.intersection(groups.ci)))
        output_file.write('Number of F-Score and CI Experts: %s\n'
                          % len(groups.fscore.intersection(groups.ci)))
        output_file.write('Number of Super Experts: %s\n' % len(groups.super_experts))
        output_file.write('\n');
        output_file.write('Number of Users (Total): %s\n'
                          % (len(groups.newsaholics) + len(groups.active_users)
                             + len(groups.common_users)))
        output_file.write('Size of market (unfiltered): %s\n'
                          % size_market_unfiltered)
        output_file.write('\n')
        # output_file.write('Number of votes by Newsaholics: %s\n'
        #                   % num_votes_newsaholics)
        # output_file.write('Number of votes by Market: %s\n' % num_votes_market)
        # output_file.write('Number of votes by Active Users: %s\n'
        #                   % num_votes_active)
        # output_file.write('Number of votes by Common Users: %s\n'
        #                   % num_votes_common)
        # output_file.write('\n');
        # output_file.write('Number of votes by Expert (Precision) Users: %s\n'
        #         % num_votes_expert_precision) 
        # output_file.write('Number of votes by Expert (fscore) Users: %s\n'
        #         % num_votes_expert_fscore) 
        # output_file.write('Number of votes by Expert (ci) Users: %s\n'
        #         % num_votes_expert_ci) 
        # output_file.write('Number of votes by Super Experts: %s\n'
        #                   % num_votes_expert_s)
        # output_file.write('Number of votes by Social Bias Experts: %s\n'
        #                   % num_votes_expert_sb)
        # output_file.write('\n')
        # output_file.write('Total Number of votes cast: %s\n'
        #                   % (num_votes_newsaholics + num_votes_active
        #                      + num_votes_common))
        # output_file.write('\n')
        output_file.write('Total Number of Good News: %s\n' % len(target_news))

      log('Ground Truth Top 50')
      for i in range(min(len(gt_rankings), 50)):
        url, count = gt_rankings[i]
        log('[%s] %s\t%s' %(i, url.strip(), count))
      log('-----------------------------------')
      log('Newsaholic Top 5')
      for i in range(min(len(ranks.newsaholics), 5)):
        url, count = ranks.newsaholics[i]
        log('[%s] %s\t%s' %(i, url.strip(), count))
      log('-----------------------------------')
      log('Active Top 5')
      for i in range(min(len(ranks.active_users), 5)):
        url, count = ranks.active_users[i]
        log('[%s] %s\t%s' %(i, url.strip(), count))
      log('-----------------------------------')
      log('Common Top 5')
      for i in range(min(len(ranks.common_users), 5)):
        url, count = ranks.common_users[i]
        log('[%s] %s\t%s' %(i, url.strip(), count))
      log('-----------------------------------')
      log('nonexpert Top 5')
      for i in range(min(len(ranks.non_experts), 5)):
        url, count = ranks.non_experts[i]
        log('[%s] %s\t%s' %(i, url.strip(), count))
      log('-----------------------------------')
      log('Expert (Precision) Top 5')
      for i in range(min(len(ranks.precision), 5)):
        url, count = ranks.precision[i]
        log('[%s] %s\t%s' %(i, url.strip(), count))
      log('-----------------------------------')
      log('Expert (fscore) Top 5')
      for i in range(min(len(ranks.fscore), 5)):
        url, count = ranks.fscore[i]
        log('[%s] %s\t%s' %(i, url.strip(), count))
      log('-----------------------------------')
      log('Expert (ci) Top 5')
      for i in range(min(len(ranks.ci), 5)):
        url, count = ranks.ci[i]
        log('[%s] %s\t%s' %(i, url.strip(), count))
      log('-----------------------------------')
      log('Super Expert Top 5')
      for i in range(min(len(ranks.super_experts), 5)):
        url, count = ranks.super_experts[i]
        log('[%s] %s\t%s' %(i, url.strip(), count))
      log('-----------------------------------')
      log('Social Bias Expert Top 5')
      for i in range(min(len(ranks.social_bias), 5)):
        url, count = ranks.social_bias[i]
        log('[%s] %s\t%s' %(i, url.strip(), count))

        
      market_rank_to_url = {}
      newsaholic_rank_to_url = {}
      active_rank_to_url = {}
      common_rank_to_url = {}
      expert_p_rank_to_url = {}
      expert_f_rank_to_url = {}
      expert_c_rank_to_url = {}
      expert_s_rank_to_url = {}
      for rank, (url, count) in enumerate(ranks.newsaholics):
        newsaholic_rank_to_url[rank] = url
      for rank, (url, count) in enumerate(ranks.population):
        market_rank_to_url[rank] = url
      for rank, (url, count) in enumerate(ranks.active_users):
        active_rank_to_url[rank] = url
      for rank, (url, count) in enumerate(ranks.common_users):
        common_rank_to_url[rank] = url
      for rank, (url, count) in enumerate(ranks.precision):
        expert_p_rank_to_url[rank] = url
      for rank, (url, count) in enumerate(ranks.fscore):
        expert_f_rank_to_url[rank] = url
      for rank, (url, count) in enumerate(ranks.ci):
        expert_c_rank_to_url[rank] = url
      for rank, (url, count) in enumerate(ranks.super_experts):
        expert_s_rank_to_url[rank] = url

      population_url_to_rank = {}
      market_url_to_rank = {}
      precision_url_to_rank = {}
      fscore_url_to_rank = {}
      ci_url_to_rank = {}
      ci_1_url_to_rank = {}
      ci_2_url_to_rank = {}
      ci_3_url_to_rank = {}
      common_url_to_rank = {}
      for rank, (url, count) in enumerate(ranks.population):
        population_url_to_rank[url] = rank
      for rank, (url, count) in enumerate(ranks.non_experts):
        market_url_to_rank[url] = rank
      for rank, (url, count) in enumerate(ranks.precision):
        precision_url_to_rank[url] = rank
      for rank, (url, count) in enumerate(ranks.fscore):
        fscore_url_to_rank[url] = rank
      for rank, (url, count) in enumerate(ranks.ci):
        ci_url_to_rank[url] = rank
      for rank, (url, count) in enumerate(ranks.ci_1):
        ci_1_url_to_rank[url] = rank
      for rank, (url, count) in enumerate(ranks.ci_2):
        ci_2_url_to_rank[url] = rank
      for rank, (url, count) in enumerate(ranks.ci_3):
        ci_3_url_to_rank[url] = rank
      for rank, (url, count) in enumerate(ranks.common_users):
        common_url_to_rank[url] = rank

      precisions, recalls = precision_recall.get_precision_recalls(gt_rankings, ranks)

      mixed_rankings = mixed_model.get_mixed_rankings(market_url_to_rank,
                                                      precisions.non_experts,
                                                      precision_url_to_rank,
                                                      precisions.precision,
                                                      fscore_url_to_rank,
                                                      precisions.fscore,
                                                      ci_url_to_rank,
                                                      precisions.ci,
                                                      ground_truth_url_to_rank)

      mixed_inact_rankings = mixed_model.get_mixed_rankings(common_url_to_rank,
                                                            precisions.common_users,
                                                            precision_url_to_rank,
                                                            precisions.precision,
                                                            fscore_url_to_rank,
                                                            precisions.fscore,
                                                            ci_url_to_rank,
                                                            precisions.ci,
                                                            ground_truth_url_to_rank)

      mixed_ci_rankings = mixed_model.get_mixed_rankings(market_url_to_rank,
                                                         precisions.non_experts,
                                                         ci_1_url_to_rank,
                                                         precisions.ci_1,
                                                         ci_2_url_to_rank,
                                                         precisions.ci_2,
                                                         ci_3_url_to_rank,
                                                         precisions.ci_3,
                                                         ground_truth_url_to_rank)
                                                         

      mixed_precisions, mixed_recalls = precision_recall.calc_precision_recall(gt_rankings, 
                                                                               mixed_rankings)

      mixed_inact_precisions, mixed_inact_recalls = precision_recall.calc_precision_recall(gt_rankings, 
                                                                                           mixed_inact_rankings)

      mixed_ci_precisions, mixed_ci_recalls = precision_recall.calc_precision_recall(gt_rankings, 
                                                                                     mixed_ci_rankings)

      log('-----------------------------------')
      log('Mixed (min) Top 5')
      for i in range(min(len(mixed_rankings), 5)):
        url, count = mixed_rankings[i]
        log('[%s] %s\t%s' %(i + 1, url, count))
      log('-----------------------------------')

      with open('%sranking_comparisons_%s.tsv'
                % (info_output_dir, run_params_str), 'w') as out_file:
        for gt_rank, (gt_url, _) in enumerate(gt_rankings):
          market_rank = 0
          precision_rank = 0
          ci_rank = 0
          fscore_rank = 0
          inactive_crowd_rank = 0
          if gt_url in market_url_to_rank:
            market_rank = market_url_to_rank[gt_url] + 1
          if gt_url in precision_url_to_rank:
            precision_rank = precision_url_to_rank[gt_url] + 1
          if gt_url in ci_url_to_rank:
            ci_rank = ci_url_to_rank[gt_url] + 1
          if gt_url in fscore_url_to_rank:
            fscore_rank = fscore_url_to_rank[gt_url] + 1
          if gt_url in common_url_to_rank:
            inactive_crowd_rank = common_url_to_rank[gt_url] + 1
          line = '%s\t%s\t%s\t%s\t%s\t%s\t%s\n' % (gt_url, gt_rank + 1,
                                                   market_rank,
                                                   inactive_crowd_rank,
                                                   precision_rank, ci_rank,
                                                   fscore_rank)
          out_file.write(line)


      with open('%sground_truth_rankings_%s.tsv'
                % (info_output_dir, run_params_str), 'w') as output_file:
        for url, count in gt_rankings:
          output_file.write('%s\t%s\n' % (url.strip(), count))
      with open('%smarket_rankings_%s.tsv'
                % (info_output_dir, run_params_str), 'w') as output_file:
        for rank, (url, count) in enumerate(ranks.common_users):
          output_file.write('%s\t%s\t(%s,%s)\n'
                            % (url.strip(), count, rank,
                               ground_truth_url_to_rank[url]))
      with open('%snewsaholic_rankings_%s.tsv'
                % (info_output_dir, run_params_str), 'w') as output_file:
        for rank, (url, count) in enumerate(ranks.newsaholics):
          output_file.write('%s\t%s\t(%s,%s)\n'
                            % (url.strip(), count, rank,
                               ground_truth_url_to_rank[url]))
      with open('%sactive_user_rankings_%s.tsv'
                % (info_output_dir, run_params_str), 'w') as output_file:
        for rank, (url, count) in enumerate(ranks.active_users):
          output_file.write('%s\t%s\t(%s,%s)\n'
                            % (url.strip(), count, rank,
                               ground_truth_url_to_rank[url]))
      with open('%scommon_user_rankings_%s.tsv'
                % (info_output_dir, run_params_str), 'w') as output_file:
        for rank, (url, count) in enumerate(ranks.common_users):
          output_file.write('%s\t%s\t(%s,%s)\n'
                            % (url.strip(), count, rank,
                               ground_truth_url_to_rank[url]))
      with open('%snonexpert_user_rankings_%s.tsv'
                % (info_output_dir, run_params_str), 'w') as output_file:
        for rank, (url, count) in enumerate(ranks.non_experts):
          output_file.write('%s\t%s\t(%s,%s)\n'
                            % (url.strip(), count, rank,
                               ground_truth_url_to_rank[url]))
      with open('%sexpert_p_user_rankings_%s.tsv'
                % (info_output_dir, run_params_str), 'w') as output_file:
        for rank, (url, count) in enumerate(ranks.precision):
          output_file.write('%s\t%s\t(%s,%s)\n'
                            % (url.strip(), count, rank,
                               ground_truth_url_to_rank[url]))
      with open('%sexpert_f_user_rankings_%s.tsv'
                % (info_output_dir, run_params_str), 'w') as output_file:
        for rank, (url, count) in enumerate(ranks.fscore):
          output_file.write('%s\t%s\t(%s,%s)\n'
                            % (url.strip(), count, rank,
                               ground_truth_url_to_rank[url]))
      with open('%sexpert_c_user_rankings_%s.tsv'
                % (info_output_dir, run_params_str), 'w') as output_file:
        for rank, (url, count) in enumerate(ranks.ci):
          output_file.write('%s\t%s\t(%s,%s)\n'
                            % (url.strip(), count, rank,
                               ground_truth_url_to_rank[url]))
      with open('%sexpert_s_user_rankings_%s.tsv'
                % (info_output_dir, run_params_str), 'w') as output_file:
        for rank, (url, count) in enumerate(ranks.super_experts):
          output_file.write('%s\t%s\t(%s,%s)\n'
                            % (url.strip(), count, rank,
                            ground_truth_url_to_rank[url]))
      with open('%smixed_rankings_%s.tsv'
                % (info_output_dir, run_params_str), 'w') as output_file:
        for rank, (url, count) in enumerate(mixed_rankings):
          output_file.write('%s\t%s\t(%s,%s)\n'
                            % (url.strip(), count, rank,
                            ground_truth_url_to_rank[url]))

      with open('../data/FolkWisdom/market_precisions_%s.txt'
                % run_params_str, 'w') as out_file:
        for precision in precisions.common_users:
          out_file.write('%s\n' % precision)

      with open('../data/FolkWisdom/nonexpert_precisions_%s.txt'
                % run_params_str, 'w') as out_file:
        for precision in precisions.non_experts:
          out_file.write('%s\n' % precision)

      with open('../data/FolkWisdom/expert_p_precisions_%s.txt'
                % run_params_str, 'w') as out_file:
        for precision in precisions.precision:
          out_file.write('%s\n' % precision)

      with open('../data/FolkWisdom/expert_f_precisions_%s.txt'
                % run_params_str, 'w') as out_file:
        for precision in precisions.fscore:
          out_file.write('%s\n' % precision)

      with open('../data/FolkWisdom/expert_c_precisions_%s.txt'
                % run_params_str, 'w') as out_file:
        for precision in precisions.ci:
          out_file.write('%s\n' % precision)

      log('Drawing summary precision-recall graphs...')
      # draw_precision_recall_graph(market_precisions, market_recalls,
      precision_recall.draw([precisions.newsaholics, precisions.active_users,
                             precisions.common_users, precisions.precision,
                             precisions.fscore, precisions.ci,
                             precisions.super_experts],
                            [recalls.newsaholics, recalls.active_users,
                             recalls.common_users, recalls.precision,
                             recalls.fscore, recalls.ci, recalls.super_experts],
                            ['Newsaholics', 'Active', 'Common', 'Precision',
                             'F-score', 'CI', 'Super Experts'],
                            'precision_recall_all',
                            run_params_str)

      # Draw via old method because it has fancy markings.
      experts.draw_precision_recall_experts(precisions.non_experts, recalls.non_experts,
                                            precisions.precision, recalls.precision,
                                            precisions.fscore, recalls.fscore,
                                            precisions.ci, recalls.ci,
                                            run_params_str)

      log('Drawing experts precision-recall graph...')
      # precision_recall.draw_with_markers([precisions.population, precisions.non_experts, precisions.precision,
      #                                     precisions.fscore, precisions.ci],
      #                                    [recalls.population, recalls.non_experts, recalls.precision,
      #                                     recalls.fscore, recalls.ci],
      #                                    ['Population', 'Crowd', 'Precision', 'F-score', 'CI'],
      #                                    'precision_recall_experts',
      #                                    0, run_params_str)

      log('Drawing mixed + inact graph...')
      precision_recall.draw_with_markers([precisions.non_experts, precisions.common_users, mixed_inact_precisions],
                                         [recalls.non_experts, recalls.common_users, mixed_inact_recalls],
                                         ['Crowd', 'Inactive Crowd', 'Mixed + Inactive'],
                                         'precision_recall_mixed_and_inactive',
                                         3, run_params_str, zoom=True)

      log('Drawing ci breakdown by followers precisions-recall graph...')
      precision_recall.draw([precisions.non_experts, precisions.ci,
                             precisions.ci_hi, precisions.ci_li],
                            [recalls.non_experts, recalls.ci,
                             recalls.ci_hi, recalls.ci_li],
                            ['Crowd', 'CI', 'CI High', 'CI Low'],
                            'precision_recall_ci_followers_breakdown',
                            run_params_str)

      log('Drawing social bias precision-recall graph...')
      precision_recall.draw([precisions.non_experts, precisions.social_bias,
                             precisions.precision, precisions.fscore,
                             precisions.ci],
                            [recalls.non_experts, recalls.social_bias,
                             recalls.precision, recalls.fscore,
                             recalls.ci],
                            ['Crowd', 'Influence Experts', 'Precision',
                             'F-score', 'CI'],
                            'precision_recall_social_bias',
                            run_params_str)

      log('Drawing basic groups precision-recall graph...')
      precision_recall.draw([precisions.newsaholics, precisions.active_users,
                             precisions.common_users],
                            [recalls.newsaholics, recalls.active_users,
                             recalls.common_users],
                            ['Newsaholics', 'Active Users', 'Common Users'],
                            'precision_recall_basic_groups',
                            run_params_str)

      log('Drawing crowd def precision-recall graph...')
      precision_recall.draw([precisions.non_experts, precisions.common_users],
                            [recalls.non_experts, recalls.common_users],
                            ['Crowd', 'Inactive Crowd'],
                            'precision_recall_crowd_def',
                            run_params_str, zoom=True)

      log('Drawing non_expert_sampling precision-recall graph...')
      precision_recall.draw_with_markers([precisions.non_experts, precisions.non_experts_sampled,
                                          precisions.non_experts_10, precisions.non_experts_25,
                                          precisions.non_experts_1, precisions.ci],
                                          [recalls.non_experts, recalls.non_experts_sampled,
                                           recalls.non_experts_10, recalls.non_experts_25,
                                           recalls.non_experts_1, recalls.ci],
                                          ['Crowd', 'Crowd (33% sample)', 'Crowd (10% sample)',
                                           'Crowd (5% sample)', 'Crowd (2% sample)', 'Experts (CI)'],
                                          'precision_recall_non_expert_sampling',
                                          3, run_params_str, ncol=2)

      # TODO: Replace with new method.
      log('Drawing mixed model precision-recall graph...')
      mixed_model.draw_precision_recall_mixed(precisions.non_experts, recalls.non_experts,
                                              mixed_precisions, mixed_recalls,
                                              run_params_str, zoom=True)

      log('Drawing mixed ci model precision-recall graph...')
      precision_recall.draw([precisions.non_experts, mixed_ci_precisions],
                            [recalls.non_experts, mixed_ci_recalls],
                            ['Crowd', 'Mixed'],
                            'precision_recall_mixed_ci',
                            run_params_str)

      log('Drawing weighted followers precision-recall graph...')
      precision_recall.draw([precisions.non_experts, precisions.weighted_followers, precisions.ci],
                            [recalls.non_experts, recalls.weighted_followers, recalls.ci],
                            ['Crowd', 'Weighted Followers', 'CI'],
                            'precision_recall_weighted_followers',
                            run_params_str)

      log('Drawing ci weighted graph...')
      precision_recall.draw([precisions.population, precisions.ci, precisions.ci_weighted],
                            [recalls.population, recalls.ci, recalls.ci_weighted],
                            ['Crowd', 'CI', 'CI (Weighted)'],
                            'precision_recall_ci_weighted',
                            run_params_str)

      log('Drawing weighted graph...')
      precision_recall.draw([precisions.population, precisions.weighted],
                            [recalls.population, recalls.weighted],
                            ['Crowd', 'Crowd (Weighted)'],
                            'precision_recall_weighted',
                            run_params_str)

      log('Drawing weighted both graph...')
      precision_recall.draw([precisions.population, precisions.weighted, precisions.weighted_both],
                            [recalls.population, recalls.weighted, recalls.weighted_both],
                            ['Crowd', 'Crowd (Weighted)', 'Crowd (Weighted Both)'],
                            'precision_recall_weighted_both',
                            run_params_str)
Пример #4
0
def run():
    """Contains the main logic for this analysis."""
    FileLog.set_log_dir()

    seeds = Util.load_seeds()
    for category in _CATEGORIES:
        log('Preforming analysis for category: %s' % category)
        size_top_news = _SIZE_TOP_NEWS
        if category:
            size_top_news = .10

        data_set = DataSet.TESTING
        retweets = set()
        if _SWITCHED:
            data_set = DataSet.TRAINING
        if _EXCLUDE_RETWEETS:
            retweets = ground_truths.find_retweets(_TESTING_SET_MONTHS)
        log('Num retweets to exclude: %s' % len(retweets))
        gt_rankings = ground_truths.get_gt_rankings(
            seeds,
            data_set,
            category,
            exclude_tweets_within_delta=_EXCLUDE_TWEETS_WITHIN_DELTA,
            retweets=retweets)
        log('Num ground_truth_rankings: %s' % len(gt_rankings))

        # Format for use later.
        ground_truth_url_to_rank = {}
        for rank, (url, count) in enumerate(gt_rankings):
            ground_truth_url_to_rank[url] = rank

        target_news = ground_truths.find_target_news(gt_rankings,
                                                     size_top_news)
        log('Size target_news: %s' % len(target_news))

        for delta in _DELTAS:
            run_params_str = 'd%s_t%s_e%s_%s' % (delta, int(
                size_top_news * 100), int(_SIZE_EXPERTS * 100), category)
            info_output_dir = '../graph/FolkWisdom/%s/info/' % run_params_str
            Util.ensure_dir_exist(info_output_dir)

            groups, d_num_followers = user_groups.get_all_user_groups(
                delta, category)
            log('Num experts (precision): %s' % len(groups.precision))
            log('Num experts (fscore): %s' % len(groups.fscore))
            log('Num experts (ci): %s' % len(groups.ci))
            log('Num Super Experts: %s' % len(groups.super_experts))
            log('Num Social Bias Experts: %s' % len(groups.social_bias))

            log('Finding rankings with an %s hour delta.' % delta)
            ranks = rankings.get_rankings(delta, seeds, groups, category,
                                          d_num_followers)

            # Output some interesting info to file
            size_market_unfiltered = '0'
            with open('../data/FolkWisdom/size_of_market_unfiltered.txt'
                      ) as in_file:
                size_market_unfiltered = in_file.readline().strip()

            with open(
                    '%suser_demographics_%s.txt' %
                (info_output_dir, run_params_str), 'w') as output_file:
                output_file.write('Number of Newsaholics: %s\n' %
                                  len(groups.newsaholics))
                output_file.write('Number of Active Users: %s\n' %
                                  len(groups.active_users))
                output_file.write('Number of Common Users: %s\n' %
                                  len(groups.common_users))
                output_file.write('\n')
                output_file.write('Number of Precision Experts: %s\n' %
                                  len(groups.precision))
                output_file.write('Number of F-Score Experts: %s\n' %
                                  len(groups.fscore))
                output_file.write('Number of CI Experts: %s\n' %
                                  len(groups.ci))
                output_file.write('Number of Social Bias Experts: %s\n' %
                                  len(groups.social_bias))
                output_file.write('Total number of unique experts: %s\n' %
                                  len(groups.all_experts))
                output_file.write(
                    'Number of Precision and F-Score Experts: %s\n' %
                    len(groups.precision.intersection(groups.fscore)))
                output_file.write(
                    'Number of Precision and CI Experts: %s\n' %
                    len(groups.precision.intersection(groups.ci)))
                output_file.write('Number of F-Score and CI Experts: %s\n' %
                                  len(groups.fscore.intersection(groups.ci)))
                output_file.write('Number of Super Experts: %s\n' %
                                  len(groups.super_experts))
                output_file.write('\n')
                output_file.write(
                    'Number of Users (Total): %s\n' %
                    (len(groups.newsaholics) + len(groups.active_users) +
                     len(groups.common_users)))
                output_file.write('Size of market (unfiltered): %s\n' %
                                  size_market_unfiltered)
                output_file.write('\n')
                # output_file.write('Number of votes by Newsaholics: %s\n'
                #                   % num_votes_newsaholics)
                # output_file.write('Number of votes by Market: %s\n' % num_votes_market)
                # output_file.write('Number of votes by Active Users: %s\n'
                #                   % num_votes_active)
                # output_file.write('Number of votes by Common Users: %s\n'
                #                   % num_votes_common)
                # output_file.write('\n');
                # output_file.write('Number of votes by Expert (Precision) Users: %s\n'
                #         % num_votes_expert_precision)
                # output_file.write('Number of votes by Expert (fscore) Users: %s\n'
                #         % num_votes_expert_fscore)
                # output_file.write('Number of votes by Expert (ci) Users: %s\n'
                #         % num_votes_expert_ci)
                # output_file.write('Number of votes by Super Experts: %s\n'
                #                   % num_votes_expert_s)
                # output_file.write('Number of votes by Social Bias Experts: %s\n'
                #                   % num_votes_expert_sb)
                # output_file.write('\n')
                # output_file.write('Total Number of votes cast: %s\n'
                #                   % (num_votes_newsaholics + num_votes_active
                #                      + num_votes_common))
                # output_file.write('\n')
                output_file.write('Total Number of Good News: %s\n' %
                                  len(target_news))

            log('Ground Truth Top 50')
            for i in range(min(len(gt_rankings), 50)):
                url, count = gt_rankings[i]
                log('[%s] %s\t%s' % (i, url.strip(), count))
            log('-----------------------------------')
            log('Newsaholic Top 5')
            for i in range(min(len(ranks.newsaholics), 5)):
                url, count = ranks.newsaholics[i]
                log('[%s] %s\t%s' % (i, url.strip(), count))
            log('-----------------------------------')
            log('Active Top 5')
            for i in range(min(len(ranks.active_users), 5)):
                url, count = ranks.active_users[i]
                log('[%s] %s\t%s' % (i, url.strip(), count))
            log('-----------------------------------')
            log('Common Top 5')
            for i in range(min(len(ranks.common_users), 5)):
                url, count = ranks.common_users[i]
                log('[%s] %s\t%s' % (i, url.strip(), count))
            log('-----------------------------------')
            log('nonexpert Top 5')
            for i in range(min(len(ranks.non_experts), 5)):
                url, count = ranks.non_experts[i]
                log('[%s] %s\t%s' % (i, url.strip(), count))
            log('-----------------------------------')
            log('Expert (Precision) Top 5')
            for i in range(min(len(ranks.precision), 5)):
                url, count = ranks.precision[i]
                log('[%s] %s\t%s' % (i, url.strip(), count))
            log('-----------------------------------')
            log('Expert (fscore) Top 5')
            for i in range(min(len(ranks.fscore), 5)):
                url, count = ranks.fscore[i]
                log('[%s] %s\t%s' % (i, url.strip(), count))
            log('-----------------------------------')
            log('Expert (ci) Top 5')
            for i in range(min(len(ranks.ci), 5)):
                url, count = ranks.ci[i]
                log('[%s] %s\t%s' % (i, url.strip(), count))
            log('-----------------------------------')
            log('Super Expert Top 5')
            for i in range(min(len(ranks.super_experts), 5)):
                url, count = ranks.super_experts[i]
                log('[%s] %s\t%s' % (i, url.strip(), count))
            log('-----------------------------------')
            log('Social Bias Expert Top 5')
            for i in range(min(len(ranks.social_bias), 5)):
                url, count = ranks.social_bias[i]
                log('[%s] %s\t%s' % (i, url.strip(), count))

            market_rank_to_url = {}
            newsaholic_rank_to_url = {}
            active_rank_to_url = {}
            common_rank_to_url = {}
            expert_p_rank_to_url = {}
            expert_f_rank_to_url = {}
            expert_c_rank_to_url = {}
            expert_s_rank_to_url = {}
            for rank, (url, count) in enumerate(ranks.newsaholics):
                newsaholic_rank_to_url[rank] = url
            for rank, (url, count) in enumerate(ranks.population):
                market_rank_to_url[rank] = url
            for rank, (url, count) in enumerate(ranks.active_users):
                active_rank_to_url[rank] = url
            for rank, (url, count) in enumerate(ranks.common_users):
                common_rank_to_url[rank] = url
            for rank, (url, count) in enumerate(ranks.precision):
                expert_p_rank_to_url[rank] = url
            for rank, (url, count) in enumerate(ranks.fscore):
                expert_f_rank_to_url[rank] = url
            for rank, (url, count) in enumerate(ranks.ci):
                expert_c_rank_to_url[rank] = url
            for rank, (url, count) in enumerate(ranks.super_experts):
                expert_s_rank_to_url[rank] = url

            population_url_to_rank = {}
            market_url_to_rank = {}
            precision_url_to_rank = {}
            fscore_url_to_rank = {}
            ci_url_to_rank = {}
            ci_1_url_to_rank = {}
            ci_2_url_to_rank = {}
            ci_3_url_to_rank = {}
            common_url_to_rank = {}
            for rank, (url, count) in enumerate(ranks.population):
                population_url_to_rank[url] = rank
            for rank, (url, count) in enumerate(ranks.non_experts):
                market_url_to_rank[url] = rank
            for rank, (url, count) in enumerate(ranks.precision):
                precision_url_to_rank[url] = rank
            for rank, (url, count) in enumerate(ranks.fscore):
                fscore_url_to_rank[url] = rank
            for rank, (url, count) in enumerate(ranks.ci):
                ci_url_to_rank[url] = rank
            for rank, (url, count) in enumerate(ranks.ci_1):
                ci_1_url_to_rank[url] = rank
            for rank, (url, count) in enumerate(ranks.ci_2):
                ci_2_url_to_rank[url] = rank
            for rank, (url, count) in enumerate(ranks.ci_3):
                ci_3_url_to_rank[url] = rank
            for rank, (url, count) in enumerate(ranks.common_users):
                common_url_to_rank[url] = rank

            precisions, recalls = precision_recall.get_precision_recalls(
                gt_rankings, ranks)

            mixed_rankings = mixed_model.get_mixed_rankings(
                market_url_to_rank, precisions.non_experts,
                precision_url_to_rank, precisions.precision,
                fscore_url_to_rank, precisions.fscore, ci_url_to_rank,
                precisions.ci, ground_truth_url_to_rank)

            mixed_inact_rankings = mixed_model.get_mixed_rankings(
                common_url_to_rank, precisions.common_users,
                precision_url_to_rank, precisions.precision,
                fscore_url_to_rank, precisions.fscore, ci_url_to_rank,
                precisions.ci, ground_truth_url_to_rank)

            mixed_ci_rankings = mixed_model.get_mixed_rankings(
                market_url_to_rank, precisions.non_experts, ci_1_url_to_rank,
                precisions.ci_1, ci_2_url_to_rank, precisions.ci_2,
                ci_3_url_to_rank, precisions.ci_3, ground_truth_url_to_rank)

            mixed_precisions, mixed_recalls = precision_recall.calc_precision_recall(
                gt_rankings, mixed_rankings)

            mixed_inact_precisions, mixed_inact_recalls = precision_recall.calc_precision_recall(
                gt_rankings, mixed_inact_rankings)

            mixed_ci_precisions, mixed_ci_recalls = precision_recall.calc_precision_recall(
                gt_rankings, mixed_ci_rankings)

            log('-----------------------------------')
            log('Mixed (min) Top 5')
            for i in range(min(len(mixed_rankings), 5)):
                url, count = mixed_rankings[i]
                log('[%s] %s\t%s' % (i + 1, url, count))
            log('-----------------------------------')

            with open(
                    '%sranking_comparisons_%s.tsv' %
                (info_output_dir, run_params_str), 'w') as out_file:
                for gt_rank, (gt_url, _) in enumerate(gt_rankings):
                    market_rank = 0
                    precision_rank = 0
                    ci_rank = 0
                    fscore_rank = 0
                    inactive_crowd_rank = 0
                    if gt_url in market_url_to_rank:
                        market_rank = market_url_to_rank[gt_url] + 1
                    if gt_url in precision_url_to_rank:
                        precision_rank = precision_url_to_rank[gt_url] + 1
                    if gt_url in ci_url_to_rank:
                        ci_rank = ci_url_to_rank[gt_url] + 1
                    if gt_url in fscore_url_to_rank:
                        fscore_rank = fscore_url_to_rank[gt_url] + 1
                    if gt_url in common_url_to_rank:
                        inactive_crowd_rank = common_url_to_rank[gt_url] + 1
                    line = '%s\t%s\t%s\t%s\t%s\t%s\t%s\n' % (
                        gt_url, gt_rank + 1, market_rank, inactive_crowd_rank,
                        precision_rank, ci_rank, fscore_rank)
                    out_file.write(line)

            with open(
                    '%sground_truth_rankings_%s.tsv' %
                (info_output_dir, run_params_str), 'w') as output_file:
                for url, count in gt_rankings:
                    output_file.write('%s\t%s\n' % (url.strip(), count))
            with open(
                    '%smarket_rankings_%s.tsv' %
                (info_output_dir, run_params_str), 'w') as output_file:
                for rank, (url, count) in enumerate(ranks.common_users):
                    output_file.write('%s\t%s\t(%s,%s)\n' %
                                      (url.strip(), count, rank,
                                       ground_truth_url_to_rank[url]))
            with open(
                    '%snewsaholic_rankings_%s.tsv' %
                (info_output_dir, run_params_str), 'w') as output_file:
                for rank, (url, count) in enumerate(ranks.newsaholics):
                    output_file.write('%s\t%s\t(%s,%s)\n' %
                                      (url.strip(), count, rank,
                                       ground_truth_url_to_rank[url]))
            with open(
                    '%sactive_user_rankings_%s.tsv' %
                (info_output_dir, run_params_str), 'w') as output_file:
                for rank, (url, count) in enumerate(ranks.active_users):
                    output_file.write('%s\t%s\t(%s,%s)\n' %
                                      (url.strip(), count, rank,
                                       ground_truth_url_to_rank[url]))
            with open(
                    '%scommon_user_rankings_%s.tsv' %
                (info_output_dir, run_params_str), 'w') as output_file:
                for rank, (url, count) in enumerate(ranks.common_users):
                    output_file.write('%s\t%s\t(%s,%s)\n' %
                                      (url.strip(), count, rank,
                                       ground_truth_url_to_rank[url]))
            with open(
                    '%snonexpert_user_rankings_%s.tsv' %
                (info_output_dir, run_params_str), 'w') as output_file:
                for rank, (url, count) in enumerate(ranks.non_experts):
                    output_file.write('%s\t%s\t(%s,%s)\n' %
                                      (url.strip(), count, rank,
                                       ground_truth_url_to_rank[url]))
            with open(
                    '%sexpert_p_user_rankings_%s.tsv' %
                (info_output_dir, run_params_str), 'w') as output_file:
                for rank, (url, count) in enumerate(ranks.precision):
                    output_file.write('%s\t%s\t(%s,%s)\n' %
                                      (url.strip(), count, rank,
                                       ground_truth_url_to_rank[url]))
            with open(
                    '%sexpert_f_user_rankings_%s.tsv' %
                (info_output_dir, run_params_str), 'w') as output_file:
                for rank, (url, count) in enumerate(ranks.fscore):
                    output_file.write('%s\t%s\t(%s,%s)\n' %
                                      (url.strip(), count, rank,
                                       ground_truth_url_to_rank[url]))
            with open(
                    '%sexpert_c_user_rankings_%s.tsv' %
                (info_output_dir, run_params_str), 'w') as output_file:
                for rank, (url, count) in enumerate(ranks.ci):
                    output_file.write('%s\t%s\t(%s,%s)\n' %
                                      (url.strip(), count, rank,
                                       ground_truth_url_to_rank[url]))
            with open(
                    '%sexpert_s_user_rankings_%s.tsv' %
                (info_output_dir, run_params_str), 'w') as output_file:
                for rank, (url, count) in enumerate(ranks.super_experts):
                    output_file.write('%s\t%s\t(%s,%s)\n' %
                                      (url.strip(), count, rank,
                                       ground_truth_url_to_rank[url]))
            with open(
                    '%smixed_rankings_%s.tsv' %
                (info_output_dir, run_params_str), 'w') as output_file:
                for rank, (url, count) in enumerate(mixed_rankings):
                    output_file.write('%s\t%s\t(%s,%s)\n' %
                                      (url.strip(), count, rank,
                                       ground_truth_url_to_rank[url]))

            with open(
                    '../data/FolkWisdom/market_precisions_%s.txt' %
                    run_params_str, 'w') as out_file:
                for precision in precisions.common_users:
                    out_file.write('%s\n' % precision)

            with open(
                    '../data/FolkWisdom/nonexpert_precisions_%s.txt' %
                    run_params_str, 'w') as out_file:
                for precision in precisions.non_experts:
                    out_file.write('%s\n' % precision)

            with open(
                    '../data/FolkWisdom/expert_p_precisions_%s.txt' %
                    run_params_str, 'w') as out_file:
                for precision in precisions.precision:
                    out_file.write('%s\n' % precision)

            with open(
                    '../data/FolkWisdom/expert_f_precisions_%s.txt' %
                    run_params_str, 'w') as out_file:
                for precision in precisions.fscore:
                    out_file.write('%s\n' % precision)

            with open(
                    '../data/FolkWisdom/expert_c_precisions_%s.txt' %
                    run_params_str, 'w') as out_file:
                for precision in precisions.ci:
                    out_file.write('%s\n' % precision)

            log('Drawing summary precision-recall graphs...')
            # draw_precision_recall_graph(market_precisions, market_recalls,
            precision_recall.draw([
                precisions.newsaholics, precisions.active_users,
                precisions.common_users, precisions.precision,
                precisions.fscore, precisions.ci, precisions.super_experts
            ], [
                recalls.newsaholics, recalls.active_users,
                recalls.common_users, recalls.precision, recalls.fscore,
                recalls.ci, recalls.super_experts
            ], [
                'Newsaholics', 'Active', 'Common', 'Precision', 'F-score',
                'CI', 'Super Experts'
            ], 'precision_recall_all', run_params_str)

            # Draw via old method because it has fancy markings.
            experts.draw_precision_recall_experts(
                precisions.non_experts, recalls.non_experts,
                precisions.precision, recalls.precision, precisions.fscore,
                recalls.fscore, precisions.ci, recalls.ci, run_params_str)

            log('Drawing experts precision-recall graph...')
            # precision_recall.draw_with_markers([precisions.population, precisions.non_experts, precisions.precision,
            #                                     precisions.fscore, precisions.ci],
            #                                    [recalls.population, recalls.non_experts, recalls.precision,
            #                                     recalls.fscore, recalls.ci],
            #                                    ['Population', 'Crowd', 'Precision', 'F-score', 'CI'],
            #                                    'precision_recall_experts',
            #                                    0, run_params_str)

            log('Drawing mixed + inact graph...')
            precision_recall.draw_with_markers(
                [
                    precisions.non_experts, precisions.common_users,
                    mixed_inact_precisions
                ], [
                    recalls.non_experts, recalls.common_users,
                    mixed_inact_recalls
                ], ['Crowd', 'Inactive Crowd', 'Mixed + Inactive'],
                'precision_recall_mixed_and_inactive',
                3,
                run_params_str,
                zoom=True)

            log('Drawing ci breakdown by followers precisions-recall graph...')
            precision_recall.draw([
                precisions.non_experts, precisions.ci, precisions.ci_hi,
                precisions.ci_li
            ], [recalls.non_experts, recalls.ci, recalls.ci_hi, recalls.ci_li],
                                  ['Crowd', 'CI', 'CI High', 'CI Low'],
                                  'precision_recall_ci_followers_breakdown',
                                  run_params_str)

            log('Drawing social bias precision-recall graph...')
            precision_recall.draw([
                precisions.non_experts, precisions.social_bias,
                precisions.precision, precisions.fscore, precisions.ci
            ], [
                recalls.non_experts, recalls.social_bias, recalls.precision,
                recalls.fscore, recalls.ci
            ], ['Crowd', 'Influence Experts', 'Precision', 'F-score', 'CI'],
                                  'precision_recall_social_bias',
                                  run_params_str)

            log('Drawing basic groups precision-recall graph...')
            precision_recall.draw([
                precisions.newsaholics, precisions.active_users,
                precisions.common_users
            ], [
                recalls.newsaholics, recalls.active_users, recalls.common_users
            ], ['Newsaholics', 'Active Users', 'Common Users'],
                                  'precision_recall_basic_groups',
                                  run_params_str)

            log('Drawing crowd def precision-recall graph...')
            precision_recall.draw(
                [precisions.non_experts, precisions.common_users],
                [recalls.non_experts, recalls.common_users],
                ['Crowd', 'Inactive Crowd'],
                'precision_recall_crowd_def',
                run_params_str,
                zoom=True)

            log('Drawing non_expert_sampling precision-recall graph...')
            precision_recall.draw_with_markers(
                [
                    precisions.non_experts, precisions.non_experts_sampled,
                    precisions.non_experts_10, precisions.non_experts_25,
                    precisions.non_experts_1, precisions.ci
                ], [
                    recalls.non_experts, recalls.non_experts_sampled,
                    recalls.non_experts_10, recalls.non_experts_25,
                    recalls.non_experts_1, recalls.ci
                ], [
                    'Crowd', 'Crowd (33% sample)', 'Crowd (10% sample)',
                    'Crowd (5% sample)', 'Crowd (2% sample)', 'Experts (CI)'
                ],
                'precision_recall_non_expert_sampling',
                3,
                run_params_str,
                ncol=2)

            # TODO: Replace with new method.
            log('Drawing mixed model precision-recall graph...')
            mixed_model.draw_precision_recall_mixed(precisions.non_experts,
                                                    recalls.non_experts,
                                                    mixed_precisions,
                                                    mixed_recalls,
                                                    run_params_str,
                                                    zoom=True)

            log('Drawing mixed ci model precision-recall graph...')
            precision_recall.draw(
                [precisions.non_experts, mixed_ci_precisions],
                [recalls.non_experts, mixed_ci_recalls], ['Crowd', 'Mixed'],
                'precision_recall_mixed_ci', run_params_str)

            log('Drawing weighted followers precision-recall graph...')
            precision_recall.draw([
                precisions.non_experts, precisions.weighted_followers,
                precisions.ci
            ], [recalls.non_experts, recalls.weighted_followers, recalls.ci],
                                  ['Crowd', 'Weighted Followers', 'CI'],
                                  'precision_recall_weighted_followers',
                                  run_params_str)

            log('Drawing ci weighted graph...')
            precision_recall.draw(
                [precisions.population, precisions.ci, precisions.ci_weighted],
                [recalls.population, recalls.ci, recalls.ci_weighted],
                ['Crowd', 'CI', 'CI (Weighted)'],
                'precision_recall_ci_weighted', run_params_str)

            log('Drawing weighted graph...')
            precision_recall.draw([precisions.population, precisions.weighted],
                                  [recalls.population, recalls.weighted],
                                  ['Crowd', 'Crowd (Weighted)'],
                                  'precision_recall_weighted', run_params_str)

            log('Drawing weighted both graph...')
            precision_recall.draw(
                [
                    precisions.population, precisions.weighted,
                    precisions.weighted_both
                ],
                [recalls.population, recalls.weighted, recalls.weighted_both],
                ['Crowd', 'Crowd (Weighted)', 'Crowd (Weighted Both)'],
                'precision_recall_weighted_both', run_params_str)