Python lispify 예제들, wikipediabase.lispify.lispify Python 예제들

예제 #1

0

파일 보기

파일: person.py 프로젝트: fakedrake/WikipediaBase

def find_date(symbol, date_type):
    """
    Resolve birth and death dates from infoboxes, or, if it is not found,
    from the first paragraph
    """
    for cls in InfoboxClassifier().classify(symbol):
        ibox_date = InfoboxResolver().resolve_infobox(cls, symbol, date_type)
        if ibox_date is not None:
            return ibox_date

    # TODO: look at categories for dates

    article = get_article(symbol)
    text = article.paragraphs()[0]  # the first paragraph
    for s, e in iter_paren(text, "."):
        paren = text[s:e]

        for ovl in overlay_parse.dates.just_ranges(paren):
            if date_type == 'birth-date':
                return lispify(ovl[0], typecode='yyyymmdd')
            elif date_type == 'death-date':
                return lispify(ovl[1], typecode='yyyymmdd')

        # If there is just one date and we need a birth date, get that
        if date_type == 'birth-date':
            for ovl in overlay_parse.dates.just_dates(paren):
                return lispify(ovl, typecode='yyyymmdd')

예제 #2

0

파일 보기

파일: knowledgebase.py 프로젝트: fakedrake/WikipediaBase

    def synonyms(self, symbol):
        synonyms = set()

        for si in self.synonym_inducers:
            synonyms.update(si.induce(symbol))

        return lispify(synonyms)

예제 #3

0

파일 보기

파일: infobox.py 프로젝트: fakedrake/WikipediaBase

    def resolve_infobox(self, cls, symbol, attr):
        """
        Return the value of the attribute for the article.
        """

        if "\n" in symbol:
            # There are no newlines in article titles
            return None

        if isinstance(attr, LispType):
            typecode, attr = attr.typecode, attr.val
        else:
            typecode, attr = self._typecode, attr

        infoboxes = get_infoboxes(symbol, cls=cls, fetcher=self.fetcher)

        for ibox in infoboxes:
            result = ibox.get(attr)
            if result:
                self.log().info("Found infobox attribute '%s'" % attr)
                assert(isinstance(result, unicode))  # TODO: remove for production

                return lispify(result, typecode=typecode, infobox_attr=attr)

            self.log().warning("Could not find infobox attribute '%s'" % attr)

        self.log().warning("Could not resolve attribute '%s' for '%s' with "
                           "class '%s'", attr, symbol, cls)

예제 #4

0

파일 보기

파일: term.py 프로젝트: fakedrake/WikipediaBase

 def url(self, article, _):
     """
     Note that this url is the wikipedia.org url. NOT the place where
     we got the page.
     """
     # Will also teake care of redirections.
     article = get_article(article)
     url = article.url()
     return lispify(url, typecode='url')

예제 #5

0

파일 보기

파일: term.py 프로젝트: fakedrake/WikipediaBase

    def short_article(self, symbol, _):
        """
        The first paragraph of the article, or if the first paragraph is
        shorter than 350 characters, then returns the first paragraphs such
        that the sum of the rendered characters is at least 350.
        """

        # TODO: check if the first paragraph is shorter than 350 characters
        first_paragraph = get_article(symbol).first_paragraph(keep_html=True)
        return lispify(first_paragraph, typecode='html')

예제 #6

0

파일 보기

파일: term.py 프로젝트: fakedrake/WikipediaBase

    def number(self, article, _):
        """
        True if it is plural.
        """
        # First paragraph refers more often to the symbol itself
        # rather than things related to it.
        txt = get_article(article).first_paragraph()

        nay = sum(map(txt.count, [' is ', ' was ', ' has ']))
        yay = sum(map(txt.count, [' are ', ' were ', ' have ']))

        # inequality because there are many more nays
        return lispify(yay > nay, typecode='calculated')

예제 #7

0

파일 보기

파일: term.py 프로젝트: fakedrake/WikipediaBase

    def proper(self, article, _):
        """
        Get a quick boolean answer based on the symbol text and the
        article text.
        """

        # Blindly copied by the ruby version
        a = re.sub(r"\s*\(.*\)\s*", "", article.replace("_", " "))
        txt = totext(get_article(article).html_source())
        ret = (txt.count(a.lower()) - txt.count(". " + a.lower()) <
               txt.count(a))

        return lispify(ret, typecode='calculated')

예제 #8

0

파일 보기

파일: frontend.py 프로젝트: fakedrake/WikipediaBase

    def get_callable(self, symbol):
        """
        Given a function name return the callable. Keywords should lispify
        the arguments.
        """

        if isinstance(symbol, Symbol):
            return self.resources()[symbol._name]

        if isinstance(symbol, Keyword):
            return lambda *args: lispify(*args, typecode=symbol._name)

        raise TypeError("Could not resolve function %s (type %s)."
                        % (symbol, str(type(symbol))))

예제 #9

0

파일 보기

파일: infobox.py 프로젝트: fakedrake/WikipediaBase

    def attributes(self, cls, symbol):
        """
        Get all infobox attributes
        """

        attributes = []
        infoboxes = get_infoboxes(symbol, cls=cls, fetcher=self.fetcher)

        for ibox in infoboxes:
            for k, v in ibox.markup_parsed_iter():
                rendered = ibox.rendered_attributes().get(k.replace('-', '_'))
                tmp = dict(code=k.upper(), rendered=rendered)
                attributes.append(tmp)

        return lispify(attributes)

예제 #10

0

파일 보기

파일: knowledgebase.py 프로젝트: fakedrake/WikipediaBase

    def get(self, cls, symbol, attr):
        """
        Gets the value of a symbol's attribute.

        :param cls: Wikipedia class of the symbol
        :param symbol: the Wikipedia article
        :param attr: the attribute to get
        :returns: the attribute's value or an error, lispified
        """
        for ar in self.resolvers:
            res = ar.resolve(cls, symbol, attr)
            if res is not None:
                break

        return lispify([res])

예제 #11

0

파일 보기

파일: term.py 프로젝트: fakedrake/WikipediaBase

    def coordinates(self, article, _):
        for ibox in get_infoboxes(article):
            src = ibox.html_source()
            if src is None:
                return None

            xpath = ".//span[@id='coordinates']"
            lat = src.find(xpath + "//span[@class='latitude']")
            lon = src.find(xpath + "//span[@class='longitude']")

            if lat is None or lon is None:
                return None

            nlat = self._dton(totext(lat))
            nlon = self._dton(totext(lon))

            return lispify([nlat, nlon], typecode='coordinates')

예제 #12

0

파일 보기

파일: term.py 프로젝트: fakedrake/WikipediaBase

    def image(self, article, attribute):
        # Make sure we are not getting back a LispType.

        infoboxes = get_infoboxes(article)
        imgs = [ibx.get('image') for ibx in infoboxes]
        if not imgs:
            return None

        img = imgs[0]
        fnam = img.replace(" ", "_")
        if "File:" in img:
            fnam = fnam.split("File:")[1]

        # TODO : is this a temporary fix? investigate what this annotation means
        # see 'Bill Clinton' for an example
        if "{{!}}border" in img:
            fnam = fnam.split("{{!}}border")[0]

        caps = [ibx.get('caption') for ibx in infoboxes]
        caps = filter(lambda x: x, caps)  # remove None values
        return lispify([0, fnam] + ([markup_unlink(caps[0])] if caps else []))

예제 #13

0

파일 보기