示例#1
0
def crawlerRC():
	gen = pagegenerators.RecentchangesPageGenerator()
	for Page in pagegenerators.PreloadingGenerator(gen,100):
		modification(Page.title())
示例#2
0
def crawlerRC():
    RC = pagegenerators.RecentchangesPageGenerator()
    for Page in pagegenerators.PreloadingGenerator(RC, 100):
        if Page.namespace() == 1: modification(Page.title())
示例#3
0
def main():
    wikipedia.config.put_throttle = 0
    wikipedia.put_throttle.setDelay()
    summary_commandline, gen, template = None, None, None
    namespaces, PageTitles, exceptions = [], [], []
    encat, newcatfile = '', ''
    autoText, autoTitle = False, False
    recentcat, newcat = False, False
    genFactory = pagegenerators.GeneratorFactory()
    for arg in wikipedia.handleArgs():
        if arg == '-autotitle':
            autoTitle = True
        elif arg == '-autotext':
            autoText = True
        elif arg.startswith('-page'):
            if len(arg) == 5:
                PageTitles.append(
                    wikipedia.input(u'Which page do you want to chage?'))
            else:
                PageTitles.append(arg[6:])
            break
        elif arg.startswith('-except:'):
            exceptions.append(arg[8:])
        elif arg.startswith('-template:'):
            template = arg[10:]
        elif arg.startswith('-encat:'):
            encat = arg[7:].replace(u'Category:',
                                    u'').replace(u'category:',
                                                 u'').replace(u'رده:', u'')
            break
        elif arg.startswith('-newcatfile:'):
            newcatfile = arg[12:]
            break
        elif arg.startswith('-recentcat'):
            arg = arg.replace(':', '')
            if len(arg) == 10:
                genfa = pagegenerators.RecentchangesPageGenerator()
            else:
                genfa = pagegenerators.RecentchangesPageGenerator(
                    number=int(arg[10:]))
            genfa = pagegenerators.DuplicateFilterPageGenerator(genfa)
            genfa = pagegenerators.NamespaceFilterPageGenerator(genfa, [14])
            preloadingGen = pagegenerators.PreloadingGenerator(genfa, 60)
            recentcat = True
            break
        elif arg.startswith('-newcat'):
            arg = arg.replace(':', '')
            if len(arg) == 7:
                genfa = pagegenerators.NewpagesPageGenerator(
                    100, False, None, 14)
            else:
                genfa = pagegenerators.NewpagesPageGenerator(
                    int(arg[7:]), False, None, 14)
            preloadingGen = pagegenerators.PreloadingGenerator(genfa, 60)
            newcat = True
            break
        elif arg.startswith('-namespace:'):
            namespaces.append(int(arg[11:]))
        elif arg.startswith('-summary:'):
            wikipedia.setAction(arg[9:])
            summary_commandline = True
        else:
            generator = genFactory.handleArg(arg)
            if generator:
                gen = generator

    if PageTitles:
        pages = [
            wikipedia.Page(wikipedia.getSite(), PageTitle)
            for PageTitle in PageTitles
        ]
        gen = iter(pages)
    if recentcat:
        for workpage in preloadingGen:
            workpage = workpage.title()
            cat = catlib.Category(wikipedia.getSite('fa'), workpage)
            gent = pagegenerators.CategorizedPageGenerator(cat)
            run(gent)
        wikipedia.stopme()
        sys.exit()
    if newcat:
        run(preloadingGen)
    if newcatfile:
        text2 = codecs.open(newcatfile, 'r', 'utf8')
        text = text2.read()
        linken = re.findall(ur'\[\[.*?\]\]', text, re.S)
        run(linken)
        wikipedia.stopme()
        sys.exit()
    if not gen:
        wikipedia.stopme()
        sys.exit()
    if namespaces != []:
        gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces)
    preloadingGen = pagegenerators.PreloadingGenerator(gen, pageNumber=60)
    run(preloadingGen)