示例#1
0
文件: nuwiki.py 项目: hexmode/mwlib
    def get_authors_from_template_args(template):
        args = get_template_args(template, expander)

        author_arg = args.get("Author", None)
        if author_arg:
            # userlinks = getUserLinks(author_arg)
            # if userlinks:
            #     return userlinks
            node = uparser.parseString("", raw=args["Author"], wikidb=wikidb)
            advtree.extendClasses(node)
            txt = node.getAllDisplayText().strip()
            if txt:
                return [txt]

        if args.args:
            return getUserLinks("\n".join([args.get(i, u"") for i in range(len(args.args))]))

        return []
示例#2
0
    def get_authors_from_template_args(template):
        args = get_template_args(template, expander)

        author_arg = args.get('Author', None)
        if author_arg:
            # userlinks = getUserLinks(author_arg)
            # if userlinks:
            #     return userlinks
            node = uparser.parseString('', raw=args['Author'], wikidb=wikidb)
            advtree.extendClasses(node)
            txt = node.getAllDisplayText().strip()
            if txt:
                return [txt]

        if args.args:
            return getUserLinks('\n'.join([args.get(i, u'') for i in range(len(args.args))]))

        return []
示例#3
0
logging.basicConfig()

# Check that a date has been specified
if not len(sys.argv) == 2:

    print "Please provide exactly one argument - the title page of the date to extract events from in wikipedia (e.g. January_1)"
    sys.exit(1)

# Gather content from wikipedia
title = sys.argv[1]
page = urllib2.urlopen("http://en.wikipedia.org/w/index.php?action=raw&title=%s" % title).read().decode("utf-8")
tree = parseString(title, page)

# Add some utility methods to the item
advtree.extendClasses(tree)

# Iterate through the items listed in the wiki page
results = {}
for node in tree.allchildren():

    if isinstance(node, Section) and node.children[0].asText().strip() in (u"Events", u"Births", u"Deaths"):

        section = node.children[0].asText().strip().lower().encode("utf-8")

        results[section] = []

        for item in [x.children[0] for x in node.children[1].allchildren() if isinstance(x, Item)]:

            # Extract the year and text from the item
            raw = item.getAllDisplayText().strip()
    def getContributors(self, name, wikidb=None):
        """Return list of image contributors
        
        @param name: image name without namespace (e.g. without "Image:")
        @type name: unicode
        
        @param wikidb: WikiDB instance (optional)
        @type wikidb: object
        
        @returns: list of contributors
        @rtype: [unicode] or None
        """
        
        desc_url = self.getDescriptionURL(name)
        if desc_url is None:
            return None
        
        # Note: We're always guessing the API helper b/c we'll get problems when
        # fetching from en.wp if we should've used commons.wikimedia.org instead.
        # A passed wikidb is only used as a fallback here.
        api_helper = get_api_helper(desc_url)
        if api_helper is None:
            if wikidb is None:
                return None
        else:
            wikidb = WikiDB(api_helper=api_helper)
        
        title = 'Image:%s' % name
        
        raw = wikidb.getRawArticle(title)
        if not raw:
            return None

        expander = Expander(u'', title, wikidb)
        
        
        def getUserLinks(raw):
            def isUserLink(node):
                return isinstance(node, parser.NamespaceLink) and node.namespace == namespace.NS_USER
            
            result = list(set([
                u.target
                for u in uparser.parseString(title,
                    raw=raw,
                    wikidb=wikidb,
                ).filter(isUserLink)
            ]))
            result.sort()
            return result
            
        
        template = find_template(raw, 'Information')
        if template is not None:
            author = get_template_args(template, expander).get('Author', '').strip()
            if author:
                users = getUserLinks(author)
                if users:
                    users = list(set(users))
                    users.sort()
                    return users
                
                node = uparser.parseString('', raw=author, wikidb=wikidb)
                advtree.extendClasses(node)
                return [node.getAllDisplayText()]
        
        users = getUserLinks(raw)
        if users:
            return users
        
        return wikidb.getAuthors(title)
示例#5
0
# Check that a date has been specified
if not len(sys.argv) == 2:

    print 'Please provide exactly one argument - the title page of the date to extract events from in wikipedia (e.g. January_1)'
    sys.exit(1)

# Gather content from wikipedia
title = sys.argv[1]
page = urllib2.urlopen(
    'http://en.wikipedia.org/w/index.php?action=raw&title=%s' %
    title).read().decode('utf-8')
tree = parseString(title, page)

# Add some utility methods to the item
advtree.extendClasses(tree)

# Iterate through the items listed in the wiki page
results = {}
for node in tree.allchildren():

    if isinstance(node, Section) and node.children[0].asText().strip() in (
            u'Events', u'Births', u'Deaths'):

        section = node.children[0].asText().strip().lower().encode('utf-8')

        results[section] = []

        for item in [
                x.children[0] for x in node.children[1].allchildren()
                if isinstance(x, Item)