Python Wikia示例，pattern.web.Wikia Python示例

示例#1

0

显示文件

def create_poem(inputwoord):
    w = Wikia(domain=inputwoord)
    wordlist = []
    wordlist1 = []

    print "start"
    while len(wordlist) < 10:
        for j in range(45):
            for i, title in enumerate(w.index(start='a', throttle=1.0, cached=True)):
                if i >= 3:
                    break
                article = w.search(title)
                words = repr(article.title).split()
                for word in words:
                    wordlist1.append(word.strip("u'"))
                for word in wordlist1:
                    if not word[0].isupper() and word.isalpha():
                        wordlist.append(word)

    for word in wordlist:
        print word

    return wordlist

示例#2

0

显示文件

文件： 09-wikia.py 项目： DevKhokhar/pattern

from pattern.web import Wikia

# This example retrieves articled from Wikia (http://www.wikia.com).
# Wikia is a collection of thousands of wikis based on MediaWiki.
# Wikipedia is based on MediaWiki too.
# Wikia queries request the article HTML source from the server. This can be slow.

domain = "monkeyisland" # "Look behind you, a three-headed monkey!"

# Alternatively, you can call this script from the commandline
# and specify another domain: python 09-wikia.py "Bieberpedia".
if len(sys.argv) > 1:
    domain = sys.argv[1]

w = Wikia(domain, language="en")

# Like Wikipedia, we can search for articles by title with Wikia.search():
print w.search("Three Headed Monkey")

# However, we may not know exactly what kind of articles exist,
# three-headed monkey" for example does not redirect to the above article.

# We can iterate through all articles with the Wikia.articles() method
# (note that Wikipedia also has a Wikipedia.articles() method).
# The "count" parameter sets the number of article titles to retrieve per query. 
# Retrieving the full article for each article takes another query. This can be slow.
i = 0
for article in w.articles(count=2, cached=True):
    print
    print article.title

示例#3

0

显示文件

from pattern.web import Wikia

# This example retrieves articled from Wikia (http://www.wikia.com).
# Wikia is a collection of thousands of wikis based on MediaWiki.
# Wikipedia is based on MediaWiki too.
# Wikia queries request the article HTML source from the server. This can be slow.

domain = "monkeyisland"  # "Look behind you, a three-headed monkey!"

# Alternatively, you can call this script from the commandline
# and specify another domain: python 09-wikia.py "Bieberpedia".
if len(sys.argv) > 1:
    domain = sys.argv[1]

w = Wikia(domain, language="en")

# Like Wikipedia, we can search for articles by title with Wikia.search():
print(w.search("Three Headed Monkey"))

# However, we may not know exactly what kind of articles exist,
# three-headed monkey" for example does not redirect to the above article.

# We can iterate through all articles with the Wikia.articles() method
# (note that Wikipedia also has a Wikipedia.articles() method).
# The "count" parameter sets the number of article titles to retrieve per query.
# Retrieving the full article for each article takes another query. This can be slow.
i = 0
for article in w.articles(count=2, cached=True):
    print("")
    print(article.title)

示例#4

0

显示文件

文件： 13-wikia.py 项目： BarcelonaMedia-ViL/pattern

# -*- coding: utf-8 *-*
import os, sys, pprint; sys.path.insert(0, os.path.join("..", ".."))

from pattern.web import Wikia

# This example retrieves articled from Wikia (http://www.wikia.com),
# a collection of thousands of wikis based on MediaWiki (i.e., what Wikipedia uses too).
# A query requests the article's HTML source from the server, which can be quite slow.

domain = "runescape" # A popular wiki...
if len(sys.argv) > 1:
    domain = sys.argv[1]

w = Wikia(domain, language="en")

# Just like Wikipedia, we can search for article titles.
# However, we may not know what articles exist.
# We can iterate through all articles with the Wikia.articles() method.
# Note that Wikipedia also has a Wikipedia.articles() method.

# The "count" parameter sets the number of article titles
# to retrieve per query to Wikia. Retrieving the full article
# for each article takes another query, so the process can be quite slow.
i = 0
for article in w.articles(count=2, cached=True):
    print
    print article.title
    #print article.plaintext()
    i += 1
    if i >= 3:
        break

示例#5

0

显示文件

文件： test1.py 项目： simgee/wikia

##MODULE = 'C:\Documents and Settings\dfg\Desktop\dev\wikia\pattern-2.6\pattern'
##import sys
##if MODULE not in sys.path: sys.path.append(MODULE)
from pattern.web import Wikia

w = Wikia(domain='lostpedia')
a = w.article(query="Richard_Alpert")

for i, title in enumerate(w.index(start='a', throttle=1.0, cached=True)):     
     if i >= 8:
         break
     article = w.search(title)
     print repr(article.title)

示例#6

0

显示文件

文件： 13-wikia.py 项目： relwell/pattern

# -*- coding: utf-8 *-*
import os, sys, pprint
sys.path.insert(0, os.path.join("..", ".."))

from pattern.web import Wikia, WikiaArticleSet, URLTimeout

# This example retrieves an article from Wikipedia (http://en.wikipedia.org).
# A query requests the article's HTML source from the server, which can be quite slow.
# It is a good idea to cache results from Wikipedia locally,
# and to set a high timeout when calling Wikipedia.search().

domain = 'runescape'  # popular wiki
if len(sys.argv) > 1:
    domain = sys.argv[1]

engine = Wikia(language="en", domain=domain)

ArticleSet = WikiaArticleSet(engine, iterationLimit=200)

counter = 0
try:
    for page in ArticleSet:
        print counter, page.title
        counter = counter + 1
except URLTimeout:
    print "Timeout error."

示例#7

0

显示文件

# -*- coding: utf-8 *-*
import os, sys, pprint
sys.path.insert(0, os.path.join("..", ".."))

from pattern.web import Wikia

# This example retrieves articled from Wikia (http://www.wikia.com),
# a collection of thousands of wikis based on MediaWiki (i.e., what Wikipedia uses too).
# A query requests the article's HTML source from the server, which can be quite slow.

domain = "runescape"  # A popular wiki...
if len(sys.argv) > 1:
    domain = sys.argv[1]

w = Wikia(domain, language="en")

# Just like Wikipedia, we can search for article titles.
# However, we may not know what articles exist.
# We can iterate through all articles with the Wikia.articles() method.
# Note that Wikipedia also has a Wikipedia.articles() method.

# The "count" parameter sets the number of article titles
# to retrieve per query to Wikia. Retrieving the full article
# for each article takes another query, so the process can be quite slow.
i = 0
for article in w.articles(count=2, cached=True):
    print
    print article.title
    #print article.plaintext()
    i += 1
    if i >= 3:

示例#8

0

显示文件

文件： test3.py 项目： simgee/wikia

##MODULE = 'C:\Documents and Settings\dfg\Desktop\dev\wikia\pattern-2.6\pattern'
##import sys
##if MODULE not in sys.path: sys.path.append(MODULE)
from pattern.web import Wikia

w = Wikia(domain='lostpedia')
a = w.article(query="ABC Medianet")
print repr(a.title)
print repr(a.categories)