Пример #1
0
def main():
    site = pywikibot.Site('en', 'wikisource')

    # filename = "מפתח:ספר_השרשים.pdf"
    # filename = "Index:ספר_השרשים.pdf"
    filename = "Index:Catholic_Encyclopedia,_volume_17.djvu"
    index = IndexPage(site, filename)

    #sql = "SELECT DISTINCT page table WHERE page = %s ORDER BY page"
    #conn = sqlite3.connect('example.db')
    #c = conn.cursor()

    gen_list = []
    #for p in c.execute(sql):
    for p in range(6, 7):
        #todo - what is content parameter ? and what should we put in filter_ql ?
        #TODO - here I get exception qualityN prp-pagequality-N" or class="new"

        # I added a patch proffread.patch to fix it
        gen = index.page_gen(start=p, end=p, filter_ql=None, content=False)
        gen_list.append(gen)

    gen = itertools.chain(*gen_list)

    pywikibot.output('\nUploading text to %s\n' % index.title(asLink=True))

    bot = ubot.UploadTranscodedPageBot(gen, site=index.site)
    bot.run()
 def test_valid_link_as_source(self):
     """Test IndexPage from valid Link as source."""
     source = pywikibot.Link(self.valid_index_title,
                             source=self.site,
                             defaultNamespace=self.site.proofread_page_ns)
     page = IndexPage(source)
     self.assertEqual(page.title(withNamespace=False), source.title)
     self.assertEqual(page.namespace(), source.namespace)
Пример #3
0
 def test_valid_link_as_source(self):
     """Test IndexPage from valid Link as source."""
     source = pywikibot.Link(self.valid_index_title,
                             source=self.site,
                             default_namespace=self.site.proofread_page_ns)
     page = IndexPage(source)
     self.assertEqual(page.title(with_ns=False), source.title)
     self.assertEqual(page.namespace(), source.namespace)
Пример #4
0
def main(*args):
    """
    Process command line arguments and invoke bot.

    If args is an empty list, sys.argv is used.

    @param args: command line arguments
    @type args: list of unicode
    """
    index = None
    pages = '1-'
    options = {}

    # Parse command line arguments.
    local_args = pywikibot.handle_args(args)
    for arg in local_args:
        arg, sep, value = arg.partition(':')
        if arg == '-index':
            index = value
        elif arg == '-pages':
            pages = value
        elif arg == '-showdiff':
            issue_deprecation_warning('The usage of -showdiff option', None, 0)
        elif arg == '-summary':
            options['summary'] = value
        elif arg == '-force':
            issue_deprecation_warning('The usage of -force option', None, 0)
        elif arg == '-always':
            options['always'] = True
        else:
            pywikibot.output('Unknown argument %s' % arg)

    # index is mandatory.
    if not index:
        pywikibot.bot.suggest_help(missing_parameters=['-index'])
        return False

    site = pywikibot.Site()
    if not site.has_extension('ProofreadPage'):
        pywikibot.error('Site %s must have ProofreadPage extension.' % site)
        return False

    index = IndexPage(site, index)

    if not index.exists():
        pywikibot.error("Page %s doesn't exist." % index)
        return False

    # Parse pages param.
    # Create a list of (start, end) tuples.
    pages = pages.split(',')
    for interval in range(len(pages)):
        start, sep, end = pages[interval].partition('-')
        start = 1 if not start else int(start)
        if not sep:
            end = start
        else:
            end = int(end) if end else index.num_pages
        pages[interval] = (start, end)

    gen_list = []
    for start, end in sorted(pages):
        gen = index.page_gen(start=start,
                             end=end,
                             filter_ql=[1],
                             content=False)
        gen_list.append(gen)

    gen = itertools.chain(*gen_list)

    pywikibot.output('\nUploading text to %s\n' % index.title(asLink=True))

    bot = UploadTextBot(gen, site=index.site, **options)
    bot.run()
def main(*args):
    """
    Process command line arguments and invoke bot.

    If args is an empty list, sys.argv is used.

    @param args: command line arguments
    @type args: str
    """
    index = None
    pages = '1-'
    options = {}

    # Parse command line arguments.
    local_args = pywikibot.handle_args(args)
    for arg in local_args:
        arg, sep, value = arg.partition(':')
        if arg == '-index':
            index = value
        elif arg == '-pages':
            pages = value
        elif arg == '-showdiff':
            options['showdiff'] = True
        elif arg == '-summary':
            options['summary'] = value
        elif arg == '-ocr':
            options['ocr'] = value or 'phetools'
        elif arg == '-threads':
            options['threads'] = int(value)
        elif arg == '-force':
            options['force'] = True
        elif arg == '-always':
            options['always'] = True
        else:
            pywikibot.output('Unknown argument ' + arg)

    # index is mandatory.
    if not index:
        pywikibot.bot.suggest_help(missing_parameters=['-index'])
        return

    # '-force' can be used with '-ocr' only.
    if 'force' in options and 'ocr' not in options:
        pywikibot.error("'-force' can be used with '-ocr' option only.")
        return

    site = pywikibot.Site()
    if not site.has_extension('ProofreadPage'):
        pywikibot.error(
            'Site {} must have ProofreadPage extension.'.format(site))
        return

    index = IndexPage(site, index)

    if not index.exists():
        pywikibot.error("Page {} doesn't exist.".format(index))
        return

    # Parse pages param.
    # Create a list of (start, end) tuples.
    pages = pages.split(',')
    for interval in range(len(pages)):
        start, sep, end = pages[interval].partition('-')
        start = 1 if not start else int(start)
        if not sep:
            end = start
        else:
            end = int(end) if end else index.num_pages
        pages[interval] = (start, end)

    # gen yields ProofreadPage objects.
    gen_list = []
    for start, end in sorted(pages):
        gen = index.page_gen(start=start, end=end, filter_ql=[1], content=True)
        gen_list.append(gen)

    gen = itertools.chain(*gen_list)

    pywikibot.output('\nUploading text to {}\n'.format(
        index.title(as_link=True)))

    bot = UploadTextBot(gen, site=index.site, **options)
    bot.run()
Пример #6
0
def main(*args):
    """
    Process command line arguments and invoke bot.

    If args is an empty list, sys.argv is used.

    @param args: command line arguments
    @type args: list of unicode
    """
    index = None
    pages = '1-'
    options = {}

    # Parse command line arguments.
    local_args = pywikibot.handle_args(args)
    for arg in local_args:
        arg, sep, value = arg.partition(':')
        if arg == '-index':
            index = value
        elif arg == '-pages':
            pages = value
        elif arg == '-showdiff':
            options['showdiff'] = True
        elif arg == '-summary':
            options['summary'] = value
        elif arg == '-ocr':
            options['ocr'] = True
        elif arg == '-force':
            options['force'] = True
        elif arg == '-always':
            options['always'] = True
        else:
            pywikibot.output('Unknown argument %s' % arg)

    # index is mandatory.
    if not index:
        pywikibot.bot.suggest_help(missing_parameters=['-index'])
        return False

    # '-force' can be used with '-ocr' only.
    if 'force' in options and 'ocr' not in options:
        pywikibot.error("'-force' can be used with '-ocr' option only.")
        return False

    site = pywikibot.Site()
    if not site.has_extension('ProofreadPage'):
        pywikibot.error('Site %s must have ProofreadPage extension.' % site)
        return False

    index = IndexPage(site, index)

    if not index.exists():
        pywikibot.error("Page %s doesn't exist." % index)
        return False

    # Parse pages param.
    # Create a list of (start, end) tuples.
    pages = pages.split(',')
    for interval in range(len(pages)):
        start, sep, end = pages[interval].partition('-')
        start = 1 if not start else int(start)
        if not sep:
            end = start
        else:
            end = int(end) if end else index.num_pages
        pages[interval] = (start, end)

    # gen yields ProofreadPage objects.
    gen_list = []
    for start, end in sorted(pages):
        gen = index.page_gen(start=start, end=end,
                             filter_ql=[1], content=False)
        gen_list.append(gen)

    gen = itertools.chain(*gen_list)

    pywikibot.output('\nUploading text to %s\n' % index.title(asLink=True))

    bot = UploadTextBot(gen, site=index.site, **options)
    bot.run()