Exemplo n.º 1
0
def test1():

    catalogs = Catalog.list()
    print "Catalogs: " , catalogs

    master = Catalog.create("master", "Computer/Home/Desktop")

    AttributeType.create("Description", "String")
    AttributeType.create("Status", "String")
    AttributeType.create("Processor", "category_relationship")

    print AttributeType.list()

    computer = master.get_category("Computer")
    computer.add_attribute("Status")
    computer.add_attribute("Processor")

    master.create_categories("Processors/Intel")
    Product.create("Core Duo i5", dict(Description="i5 processor"), master, "Processors/Intel")
    Product.create("Core Duo i7", dict(Description="i7 processor"), master, "Processors/Intel")

    desktop = master.get_category("Computer/Home/Desktop")
    desktop.add_attribute("Description")
    print desktop.get_attributes()

    Product.create("Multimedia Server 1", dict(Description="Multimedia Server for video editing", Status="Active", Processor="Processors/Intel"), master, "Computer/Home/Desktop")
    Product.create("Multimedia Server 2", dict(Description="Multimedia Server for audio editing", Status="Inactive", Processor="Processors/Intel"), master, "Computer/Home/Desktop")
Exemplo n.º 2
0
 def __init__(self):
     self.anchors_db = plyvel.DB('catalog/wikipedia-anchors')
     self.catalog = Catalog("catalog")
     self.cache = {}
Exemplo n.º 3
0
class Disambiguator(object):
    def __init__(self):
        self.anchors_db = plyvel.DB('catalog/wikipedia-anchors')
        self.catalog = Catalog("catalog")
        self.cache = {}

    def get_senses(self, spot):
        # Here we convert a list of {'id': page_id, 'score': num_links}
        # to this format [(page_id, score), ..]

        senses = [
            sense for sense in loads(self.anchors_db.get(spot.encode('utf8')))
        ]

        # In this case we have a link_probability. Extract it
        if isinstance(senses[0], int):
            link_probability = senses.pop(0)
        else:
            link_probability = 1
        # Then we take the sum of all the scores and extract the probability
        total_score = float(sum(map(lambda x: x[1], senses)))
        senses = sorted(map(lambda x: (x[0], x[1] / total_score), senses), key=lambda x: x[1], reverse=True)
        #senses = map(lambda x: (x[0], 1 / float(len(senses))), senses)


        print "LINKPROB", spot, link_probability, total_score, len(senses)

        link_probability = max(link_probability, total_score, len(senses))
        link_probability = total_score / float(link_probability)

        return (link_probability, senses)

    def relatedness(self, id_a, id_b):
        a = self.catalog.get_entity('', id_a)
        b = self.catalog.get_entity('', id_b)

        score = a.related(b)

        self.cache[(id_a, id_b)] = score

        return score

    def vote_for(self, p_a, pg_b, prg_b, scores={}):
        """
        @param p_a the page_id of the sense to give to the spot a
        @param pg_b all the possible page_id of the sense to give to the spot b
        @param a list of floats containing the (p_b | b) probability
        """
        #with profiled("Collaborative voting for " + str(p_a) + " in %s"):
        relatedness = 0
        probabilities = 0

        for p_b, pr_b in zip(pg_b, prg_b):
            if p_b < p_a:
                relatedness += scores.get((p_b, p_a), 0)
            else:
                relatedness += scores.get((p_a, p_b), 0)


            #relatedness += self.relatedness(p_b, p_a)
            probabilities += pr_b


        score = (relatedness * probabilities) / len(pg_b)

        #print "Voting for", p_a, pg_b, score

        return score

    def precompute_related(self, senses):
        """
        @param a set of ids
        """
        with profiled("Precomputing scores in %s"):
            scores = {}

            operation = 0
            maxoperation = len(senses) / 2

            while operation < maxoperation:
                spot_senses = senses.pop(0)

                for other_senses in senses:
                    for source in spot_senses:
                        for dest in other_senses:
                            a, b = source, dest

                            if a > b:
                                a, b = b, a

                            if (a, b) in scores:
                                continue

                            scores[(a, b)] = self.relatedness(a, b)

                senses.append(spot_senses)
                operation += 1

            return scores


    def disambiguate(self, spots):
        db = self.anchors_db

        allsenses = []

        with profiled("Disambiguation in %s"):
            # First we get all the page connected to each spot

            pages = defaultdict(list)
            lp = {}

            index = 0
            while index < len(spots):
                spot = spots[index]
                link_prob, senses = self.get_senses(spot)

                allsenses.append(map(lambda x: x[0], senses))

                if link_prob > 0:
                    lp[spot], pages[spot] = link_prob, senses
                    #print "Link probability", spot, link_prob
                    index += 1
                else:
                    del spots[index]
                    print "Removing spot", spot, link_prob

            scores = self.precompute_related(allsenses)

            # Then we remove each spot and append it to the end
            # so we can apply the voting scheme

            candidates = defaultdict(list)

            for spot in spots:
                sense_ids = pages.pop(spot)

                #print "Voting for spot", spot

                for p_a, pr_a in sense_ids:
                    score = 0
                    counter = 0

                    for counter, (p_b, pg_pr_b) in enumerate(pages.items()):
                        pg_b = map(lambda x: x[0], pg_pr_b)
                        prg_b = map(lambda x: x[1], pg_pr_b)

                        score += self.vote_for(p_a, pg_b, prg_b, scores)

                    score /= float(counter + 1)
                    #print "Vote to spot", spot, p_a, "is", score

                    print "Link probability", lp[spot]

                    rho = (score + lp[spot]) / 2.0
                    candidates[spot].append((pr_a, rho, p_a))

                pages[spot] = sense_ids

            winning = {}

            for spot, champion_list in candidates.items():
                print "Spot", spot, "has the following:"


                for pr_a, score, p_a in sorted(champion_list, reverse=True, key=lambda x: x[1]):
                    #if score < 0.05:
                    #    continue

                    print "\tProability:", pr_a, "Score:", score, "Page id:", p_a, "Title", self.catalog.get_title(p_a)

                _, rho, wiki_id = sorted(champion_list, reverse=True, key=lambda x: x[1])[0]

                print _, rho, wiki_id

                #if rho > 0.05:
                winning[spot] = {
                    "rho": rho,
                    "id": wiki_id,
                    "title": self.catalog.get_title(wiki_id),
                    "portals": self.catalog.get_portals(wiki_id),
                }

            return winning
Exemplo n.º 4
0
import os
import json
import uuid
from flask import Flask, request, Response, jsonify, send_from_directory
from random import choice

from lib.logger import get_logger

logger = get_logger(LOG_NAME='noplp')

app = Flask(__name__)
app.rounds = {}
app.challenges = {}
datapath = os.path.join("/home/ec2-user/noplp/backend/data")
# datapath = os.path.join(os.getcwd(), "data")
app.catalog = Catalog(os.path.join(datapath, "list.csv"))
app.rounds = {}
app.current_round_id = None


def add_cors_headers(response):
    response.headers['Access-Control-Allow-Origin'] = '*'
    if request.method == 'OPTIONS':
        response.headers[
            'Access-Control-Allow-Methods'] = 'DELETE, GET, POST, PUT'
        headers = request.headers.get('Access-Control-Request-Headers')
        if headers:
            response.headers['Access-Control-Allow-Headers'] = headers
    return response

Exemplo n.º 5
0
 def __init__(self):
     self.anchors_db = plyvel.DB('catalog/wikipedia-anchors')
     self.catalog = Catalog("catalog")
     self.cache = {}
Exemplo n.º 6
0
class Disambiguator(object):
    def __init__(self):
        self.anchors_db = plyvel.DB('catalog/wikipedia-anchors')
        self.catalog = Catalog("catalog")
        self.cache = {}

    def get_senses(self, spot):
        # Here we convert a list of {'id': page_id, 'score': num_links}
        # to this format [(page_id, score), ..]

        senses = [
            sense for sense in loads(self.anchors_db.get(spot.encode('utf8')))
        ]

        # In this case we have a link_probability. Extract it
        if isinstance(senses[0], int):
            link_probability = senses.pop(0)
        else:
            link_probability = 1
        # Then we take the sum of all the scores and extract the probability
        total_score = float(sum(map(lambda x: x[1], senses)))
        senses = sorted(map(lambda x: (x[0], x[1] / total_score), senses),
                        key=lambda x: x[1],
                        reverse=True)
        #senses = map(lambda x: (x[0], 1 / float(len(senses))), senses)

        print "LINKPROB", spot, link_probability, total_score, len(senses)

        link_probability = max(link_probability, total_score, len(senses))
        link_probability = total_score / float(link_probability)

        return (link_probability, senses)

    def relatedness(self, id_a, id_b):
        a = self.catalog.get_entity('', id_a)
        b = self.catalog.get_entity('', id_b)

        score = a.related(b)

        self.cache[(id_a, id_b)] = score

        return score

    def vote_for(self, p_a, pg_b, prg_b, scores={}):
        """
        @param p_a the page_id of the sense to give to the spot a
        @param pg_b all the possible page_id of the sense to give to the spot b
        @param a list of floats containing the (p_b | b) probability
        """
        #with profiled("Collaborative voting for " + str(p_a) + " in %s"):
        relatedness = 0
        probabilities = 0

        for p_b, pr_b in zip(pg_b, prg_b):
            if p_b < p_a:
                relatedness += scores.get((p_b, p_a), 0)
            else:
                relatedness += scores.get((p_a, p_b), 0)

            #relatedness += self.relatedness(p_b, p_a)
            probabilities += pr_b

        score = (relatedness * probabilities) / len(pg_b)

        #print "Voting for", p_a, pg_b, score

        return score

    def precompute_related(self, senses):
        """
        @param a set of ids
        """
        with profiled("Precomputing scores in %s"):
            scores = {}

            operation = 0
            maxoperation = len(senses) / 2

            while operation < maxoperation:
                spot_senses = senses.pop(0)

                for other_senses in senses:
                    for source in spot_senses:
                        for dest in other_senses:
                            a, b = source, dest

                            if a > b:
                                a, b = b, a

                            if (a, b) in scores:
                                continue

                            scores[(a, b)] = self.relatedness(a, b)

                senses.append(spot_senses)
                operation += 1

            return scores

    def disambiguate(self, spots):
        db = self.anchors_db

        allsenses = []

        with profiled("Disambiguation in %s"):
            # First we get all the page connected to each spot

            pages = defaultdict(list)
            lp = {}

            index = 0
            while index < len(spots):
                spot = spots[index]
                link_prob, senses = self.get_senses(spot)

                allsenses.append(map(lambda x: x[0], senses))

                if link_prob > 0:
                    lp[spot], pages[spot] = link_prob, senses
                    #print "Link probability", spot, link_prob
                    index += 1
                else:
                    del spots[index]
                    print "Removing spot", spot, link_prob

            scores = self.precompute_related(allsenses)

            # Then we remove each spot and append it to the end
            # so we can apply the voting scheme

            candidates = defaultdict(list)

            for spot in spots:
                sense_ids = pages.pop(spot)

                #print "Voting for spot", spot

                for p_a, pr_a in sense_ids:
                    score = 0
                    counter = 0

                    for counter, (p_b, pg_pr_b) in enumerate(pages.items()):
                        pg_b = map(lambda x: x[0], pg_pr_b)
                        prg_b = map(lambda x: x[1], pg_pr_b)

                        score += self.vote_for(p_a, pg_b, prg_b, scores)

                    score /= float(counter + 1)
                    #print "Vote to spot", spot, p_a, "is", score

                    print "Link probability", lp[spot]

                    rho = (score + lp[spot]) / 2.0
                    candidates[spot].append((pr_a, rho, p_a))

                pages[spot] = sense_ids

            winning = {}

            for spot, champion_list in candidates.items():
                print "Spot", spot, "has the following:"

                for pr_a, score, p_a in sorted(champion_list,
                                               reverse=True,
                                               key=lambda x: x[1]):
                    #if score < 0.05:
                    #    continue

                    print "\tProability:", pr_a, "Score:", score, "Page id:", p_a, "Title", self.catalog.get_title(
                        p_a)

                _, rho, wiki_id = sorted(champion_list,
                                         reverse=True,
                                         key=lambda x: x[1])[0]

                print _, rho, wiki_id

                #if rho > 0.05:
                winning[spot] = {
                    "rho": rho,
                    "id": wiki_id,
                    "title": self.catalog.get_title(wiki_id),
                    "portals": self.catalog.get_portals(wiki_id),
                }

            return winning
Exemplo n.º 7
0
 def GET(self, path="/"):
     return Catalog.list(path)
Exemplo n.º 8
0
 def PUT(self, path):
     return Catalog.create(path)