示例#1
0
 def rank(self):
     competitors = self.competitors.filter(
         is_private=False,
         status__gt=0,
     ).distinct().order_by('-tot_points')
     points = [x.tot_points for x in competitors]
     ranked = Ranking(points, strategy=ORDINAL, start=1)
     for competitor in competitors:
         competitor.tot_rank = ranked.rank(competitor.tot_points)
         competitor.save()
     competitors = self.competitors.filter(
         is_private=False,
         status__gt=0,
     ).distinct().order_by('-mus_points')
     points = [x.mus_points for x in competitors]
     ranked = Ranking(points, start=1)
     for competitor in competitors:
         competitor.mus_rank = ranked.rank(competitor.mus_points)
         competitor.save()
     competitors = self.competitors.filter(
         is_private=False,
         status__gt=0,
     ).distinct().order_by('-per_points')
     points = [x.per_points for x in competitors]
     ranked = Ranking(points, start=1)
     for competitor in competitors:
         competitor.per_rank = ranked.rank(competitor.per_points)
         competitor.save()
     competitors = self.competitors.filter(
         is_private=False,
         status__gt=0,
     ).distinct().order_by('-sng_points')
     points = [x.sng_points for x in competitors]
     ranked = Ranking(points, start=1)
     for competitor in competitors:
         competitor.sng_rank = ranked.rank(competitor.sng_points)
         competitor.save()
     return
示例#2
0
	X_repeat = np.repeat(X_all[idx1,:MERGED_SIZE/2].reshape((1,MERGED_SIZE/2)),X_all.shape[0], axis = 0)
	X_right = np.concatenate((X_repeat, X_all[:,:MERGED_SIZE/2]), axis = 1)

	X_rel = np.ones((X_all.shape[0],1))
	X_rel.fill(meta_dict['dicts']['rel_idx'][r])


	b = np.random.choice(range(N_BATCH))
	scores_left = getFinalClone(X_left[b*BATCH_SIZE: (b+1)*BATCH_SIZE],X_rel[b*BATCH_SIZE: (b+1)*BATCH_SIZE])
	scores_right = getFinalClone(X_right[b*BATCH_SIZE: (b+1)*BATCH_SIZE],X_rel[b*BATCH_SIZE: (b+1)*BATCH_SIZE])

	score_correct = getFinalClone(np.concatenate((X_all[idx1,:MERGED_SIZE/2].reshape((1,MERGED_SIZE/2)),X_all[idx2,:MERGED_SIZE/2].reshape((1,MERGED_SIZE/2))), axis = 1), np.array(meta_dict['dicts']['rel_idx'][r]).reshape((1,1)) )

	if instance % 10000 == 0:
		print >> sys.stderr, "instance %d/%d " % (instance+1, len(lines))

	l_list = scores_left.reshape((scores_left.shape[0],)).tolist() + score_correct.tolist()[0]
	r_list = scores_right.reshape((scores_right.shape[0],)).tolist() + score_correct.tolist()[0]

	l_set = set(l_list)
	r_set = set(r_list)
	r_left = Ranking(sorted(l_list,reverse = True))
	r_right = Ranking(sorted(r_list,reverse = True))
	left_rank = r_left.rank(score_correct[0][0])
	right_rank = r_right.rank(score_correct[0][0])
	llist += [left_rank]
	rlist += [right_rank]
print sys.argv[1],':',np.mean(llist), np.mean(rlist)

 def compute_p_value(self, test_statistic, null_distribution):
     dist = np.append(null_distribution, test_statistic)
     dist.sort()
     r = Ranking(dist[::-1])
     return r.rank(test_statistic) / float(len(dist))
 def compute_p_value(self, test_statistic, null_distribution):
   dist = np.append(null_distribution, test_statistic)
   dist.sort()
   r = Ranking(dist[::-1])
   return r.rank(test_statistic)/float(len(dist))
示例#5
0
 def rank(self):
     appearances = self.appearances.filter(
         competitor__is_private=False,
         competitor__status__gt=0,
     ).distinct().order_by('-tot_points')
     points = [x.tot_points for x in appearances]
     ranked = Ranking(points, strategy=ORDINAL, start=1)
     for appearance in appearances:
         appearance.tot_rank = ranked.rank(appearance.tot_points)
         appearance.save()
     appearances = self.appearances.filter(
         competitor__is_private=False,
         competitor__status__gt=0,
     ).distinct().order_by('-mus_points')
     points = [x.mus_points for x in appearances]
     ranked = Ranking(points, start=1)
     for appearance in appearances:
         appearance.mus_rank = ranked.rank(appearance.mus_points)
         appearance.save()
     appearances = self.appearances.filter(
         competitor__is_private=False,
         competitor__status__gt=0,
     ).distinct().order_by('-per_points')
     points = [x.per_points for x in appearances]
     ranked = Ranking(points, start=1)
     for appearance in appearances:
         appearance.per_rank = ranked.rank(appearance.per_points)
         appearance.save()
     appearances = self.appearances.filter(
         competitor__is_private=False,
         competitor__status__gt=0,
     ).distinct().order_by('-sng_points')
     points = [x.sng_points for x in appearances]
     ranked = Ranking(points, start=1)
     for appearance in appearances:
         appearance.sng_rank = ranked.rank(appearance.sng_points)
         appearance.save()
     # Songs ranked relative to Round
     Song = apps.get_model('api.song')
     songs = Song.objects.filter(
         appearance__round=self,
         appearance__competitor__is_private=False,
         appearance__competitor__status__gt=0,
     ).distinct().order_by('-tot_points')
     points = [x.tot_points for x in songs]
     ranked = Ranking(points, strategy=ORDINAL, start=1)
     for song in songs:
         song.tot_rank = ranked.rank(song.tot_points)
         song.save()
     songs = Song.objects.filter(
         appearance__round=self,
         appearance__competitor__is_private=False,
         appearance__competitor__status__gt=0,
     ).distinct().order_by('-mus_points')
     points = [x.mus_points for x in songs]
     ranked = Ranking(points, start=1)
     for song in songs:
         song.mus_rank = ranked.rank(song.mus_points)
         song.save()
     songs = Song.objects.filter(
         appearance__round=self,
         appearance__competitor__is_private=False,
         appearance__competitor__status__gt=0,
     ).distinct().order_by('-per_points')
     points = [x.per_points for x in songs]
     ranked = Ranking(points, start=1)
     for song in songs:
         song.per_rank = ranked.rank(song.per_points)
         song.save()
     songs = Song.objects.filter(
         appearance__round=self,
         appearance__competitor__is_private=False,
         appearance__competitor__status__gt=0,
     ).distinct().order_by('-sng_points')
     points = [x.sng_points for x in songs]
     ranked = Ranking(points, start=1)
     for song in songs:
         song.sng_rank = ranked.rank(song.sng_points)
         song.save()
     return
def getRank():
    topic_results = mongo.topicCollection.find()
    queries = [topic_result for topic_result in topic_results]
    filters = []
    pending_topics = []
    rank_list = []
    final_ranks = []
    df = pd.read_csv("./handles.csv")
    screen_names = df.values.tolist()

    candidates = []

    for screen_name in screen_names:
        res = twitterGraph.fetch_user(screen_name[1])
        candidates.append({
            'id': res.id,
            'screen_name': screen_name[1],
            'name': screen_name[0],
            #    'topic_relevance': 0,
            'image_url': res.profile_image_url
        })

    ranks = topicInfluence.compute_rank(
        twitterFetch["max_tweets"],
        [candidate['id'] for candidate in candidates])
    for i, rank in enumerate(ranks):
        candidates[i] = dict(candidates[i], **rank)
        candidates[i]["moiScore"] = moi.fetch_moi_score(
            candidates[i]['id'], twitterFetch["max_tweets"])

    ranking = Ranking(candidates, filters=['influence', 'moiScore'])

    weightages = {'influence': 0.5, 'moiScore': 0.5}

    ranking.rank(weightages)

    influenceRanks = ranking.dataframe.to_dict(orient='records')[:5]
    print("Influence ranks")
    print(influenceRanks)
    weightages = {
        'influence': 0.125,
        'moiScore': 0.125,
        'topic_relevance': 0.75
    }
    for query_dict in queries:
        topic_dist = 0
        query = query_dict['name']
        if query_dict['isPresent'] == False:
            pending_topics.append(query_dict['name'])
            for candidate in candidates:
                candidate["topic_relevance"] = 0
        else:
            rankings = []
            for candidate in candidates:
                candidateTweets = twitterGraph.fetch_preprocessed_tweets(
                    candidate['id'], twitterFetch["max_tweets"])
                print(candidate["screen_name"])
                candidate[
                    "topic_relevance"] = ldamodelInstance.getTopicDistFromQuery(
                        candidateTweets, query)

        ranking = Ranking(candidates)
        ranking.rank(weightages)
        rank_list = ranking.dataframe.to_dict(orient='records')[:5]

        final_ranks.append({'query': query, 'rank_list': rank_list})
    return render_template("form.html",
                           final_ranks=final_ranks,
                           pending_topics=pending_topics,
                           influenceRanks=influenceRanks)
示例#7
0
class Discovery:
    def __init__(self, seed_file, data_dir, similarity_method):
        if not os.path.exists(data_dir):
            os.makedirs(data_dir)
        self.ranked_urls_file = data_dir + "/ranked_urls.csv"

        self.seed_urls = self._read_urls_from_file(seed_file)
        print "Number of seed urls: ", len(self.seed_urls)
        self.discovered_urls = set()
        for url in self.seed_urls:
            self.discovered_urls.add(url)

        self.searcher = Search_APIs(data_dir)
        self.ranker = Ranking(data_dir, self.seed_urls, similarity_method)
        self.seed_threshold = 0.1  # minimum score for an url to be selected as a seed
        self.search_threshold = 0.05  # minimum score for an url to be selected for search

    def discover_with_backlink_search(self):
        '''url discovery using moz backlink search'''

        next_urls = [(-1.0, url) for url in self.seed_urls
                     ]  # negate the scores to turn minheap into maxheap
        heapq.heapify(next_urls)  # make next_urls be priority queue
        new_discovered_urls = []  # urls with relevant scores
        ranked_urls = []  # discovered urls with ranking scores

        while next_urls:
            seed = heapq.heappop(next_urls)[1]
            results = self.searcher.search_related(seed)
            for url in results:
                if url not in self.discovered_urls:
                    new_discovered_urls.append(url)
                    self.discovered_urls.add(url)

            print "Seed: ", seed, "Retrieved ", len(results), " related urls"

            # Rank the discovered urls
            new_seed_urls = []
            if new_discovered_urls:
                new_ranked_urls = self.ranker.rank(new_discovered_urls)
                self._save_ranked_urls(new_ranked_urls)
                ranked_urls.extend(new_ranked_urls)
                for url, score in new_ranked_urls:
                    if score > self.seed_threshold:
                        new_seed_urls.append(url)
                    if score > self.search_threshold:
                        heapq.heappush(next_urls, (-score, url))
                new_discovered_urls = []
                for url, score in new_ranked_urls:
                    print url, score
            self.ranker.update_seeds(new_seed_urls)

            if len(self.discovered_urls) > 300: break

    def discover_with_related_search(self):
        '''url discovery using google related search'''

        next_urls = [(-1.0, url) for url in self.seed_urls
                     ]  # negate the scores to turn minheap into maxheap
        heapq.heapify(next_urls)  # make next_urls be priority queue
        new_discovered_urls = []  # urls with relevant scores
        ranked_urls = []  # discovered urls with ranking scores

        while next_urls:
            seed = heapq.heappop(next_urls)[1]
            results = self.searcher.search_related(seed)
            for url in results:
                if url not in self.discovered_urls:
                    new_discovered_urls.append(url)
                    self.discovered_urls.add(url)

            print "Seed: ", seed, "Retrieved ", len(results), " related urls"

            # Rank the discovered urls
            new_seed_urls = []
            if new_discovered_urls:
                new_ranked_urls = self.ranker.rank(new_discovered_urls)
                self._save_ranked_urls(new_ranked_urls)
                ranked_urls.extend(new_ranked_urls)
                for url, score in new_ranked_urls:
                    if score > self.seed_threshold:
                        new_seed_urls.append(url)
                    if score > self.search_threshold:
                        heapq.heappush(next_urls, (-score, url))
                new_discovered_urls = []
                for url, score in new_ranked_urls:
                    print url, score
            self.ranker.update_seeds(new_seed_urls)

            if len(self.discovered_urls) > 300: break

    def _save_ranked_urls(self, urls):
        out = open(self.ranked_urls_file, "a+")
        for url, score in urls:
            out.write(str(score) + " " + url + "\n")
        out.close()

    def test_discover_with_related_search(self):
        '''A simple discovery round using related search'''

        # Discover related urls
        related_urls = []  # results from related search
        for seed in self.seed_urls:
            results = self.searcher.search_related(seed)
            #time.sleep(5)
            if results:
                for url in results:
                    if url not in self.discovered_urls:
                        related_urls.append(url)
                        self.discovered_urls.add(url)

        print "Retrieved ", len(related_urls), " related urls"

        # Rank the discovered urls
        ranked_urls = self.ranker.rank(related_urls)
        for url, score in ranked_urls:
            print url, score

    def _read_urls_from_file(self, filepath):
        urls = []
        with open(filepath) as lines:
            for line in lines:
                url = url_normalize(line.strip())
                urls.append(url)
        return urls