示例#1
0
from wikipedia import Wikipedia
import argparse
''' A script used to randomly collect Wikipedia articles '''
''' Parse command line arguments '''

parser = argparse.ArgumentParser()

parser.add_argument("how_many_pages", type=int, help="crawling articles limit")
parser.add_argument("subdomain", type=str, help="crawling subdomain")

args = parser.parse_args()
''' Start crawling '''

wiki = Wikipedia(args.subdomain)
wiki.crawl(args.how_many_pages)
示例#2
0
文件: crawler.py 项目: r0pchainz/wiki
''' A script used to randomly collect Wikipedia articles '''

''' Parse command line arguments '''

parser = argparse.ArgumentParser()

parser.add_argument(
	"time_limit",
	type=int,
	help="crawling time limit in seconds"
)
parser.add_argument(
	"subdomain",
	type=str,
	help="crawling subdomain"
)
parser.add_argument(
	"-s",
	"--summary",
	action="store_true",
	help="collect summaries instead of full articles"
)

args = parser.parse_args()

''' Start crawling '''

wiki = Wikipedia(args.subdomain, args.summary)
wiki.crawl(args.time_limit)