示例#1
0
def skipped_info(batch, skipped, view="last-new"):
    """
    Provide information/diffs for a list of skipped files.

    :param batch: The category added to all files of the format
        "Category:Media contributed by LSH: <batch>".
    :param skipped: list of skipped Commons filenames (incl. namespace).
    :param view: the diff view which you want. Allows last-new,
        first-last, first-new with last-new as default.
    """
    allowed_views = ('last-new', 'first-last', 'first-new')
    if view not in allowed_views:
        pywikibot.output("view must be one of the allowed_views: {}".format(
            ', '.join(allowed_views)))
    data = load_and_dump_LSH_info(batch)
    site = pywikibot.Site('commons', 'commons')
    for orig_name, image_data in data.items():
        title = "File:{:s}".format(image_data['filename'])
        if title not in skipped:
            continue
        page = pywikibot.Page(site, title)
        last_text = page.get()
        new_text = make_info_page(image_data)
        first_text = page.getOldVersion(page.oldest_revision.revid)
        if view == 'last-new':
            print_diff(last_text, new_text)
        elif view == 'first-last':
            print_diff(first_text, last_text)
        elif view == 'first-new':
            print_diff(first_text, new_text)
        raw_input(u"Press enter for next.")
示例#2
0
def process_info_blob(info, site, log, summary, dry, cutoff=None):
    """
    Process each image in an info blob.

    :param info: output of make_info
    :param site: the pywikibot.Site corresponding to the image repository
    :param log: the log function to use
    :param summary: the edit summary to use
    :param dry: if this is a dry run
    :param cutoff: Number of images to process before terminating. If None,
        assume all.
    """
    # @TODO: Check that File: is not already included in filename
    pywikibot.output('Processing {} images.'.format(len(info)))
    counter = 0
    for orig_name, image_data in info.items():
        if cutoff and cutoff <= counter:
            pywikibot.output('Reached cutoff.')
            return
        title = "File:{:s}".format(image_data['filename'])
        page = pywikibot.Page(site, title)
        new_text = make_info_page(image_data)
        updated_page = UpdatedPage(page, new_text, dry_run=dry,
                                   summary=summary)
        log_entry = updated_page.handle_single_page()
        if updated_page.update_page():
            log_entry = "Updated | {}".format(log_entry)
        else:
            log_entry = "Skipped | {}".format(log_entry)
        log("{title:s} | {log:s}\n".format(
            title=page.title(),
            log=log_entry))
        counter += 1
示例#3
0
def process_info_blob(info, site, log, summary, dry, cutoff=None):
    """
    Process each image in an info blob.

    :param info: output of make_info
    :param site: the pywikibot.Site corresponding to the image repository
    :param log: the log function to use
    :param summary: the edit summary to use
    :param dry: if this is a dry run
    :param cutoff: Number of images to process before terminating. If None,
        assume all.
    """
    # @TODO: Check that File: is not already included in filename
    pywikibot.output('Processing {} images.'.format(len(info)))
    counter = 0
    for orig_name, image_data in info.items():
        if cutoff and cutoff <= counter:
            pywikibot.output('Reached cutoff.')
            return
        title = "File:{:s}".format(image_data['filename'])
        page = pywikibot.Page(site, title)
        new_text = make_info_page(image_data)
        updated_page = UpdatedPage(page,
                                   new_text,
                                   dry_run=dry,
                                   summary=summary)
        log_entry = updated_page.handle_single_page()
        if updated_page.update_page():
            log_entry = "Updated | {}".format(log_entry)
        else:
            log_entry = "Skipped | {}".format(log_entry)
        log("{title:s} | {log:s}\n".format(title=page.title(), log=log_entry))
        counter += 1
示例#4
0
def makeAndRename(hitlist, outPath):
    """
    Given a hitlist create the info files and rename the matched file.

    @param hitlist: the output of makeHitlist
    @param outPath: the directory in which to store info + renamed files
    """
    # create outPath if it doesn't exist
    common.create_dir(outPath)

    # logfile
    logfile = os.path.join(outPath, '¤generator.log')
    flog = open(logfile, 'a', encoding='utf-8')

    for hit in hitlist:
        base_name = os.path.join(outPath, hit['data']['filename'])

        # output info file
        common.open_and_write_file('%s.info' % base_name,
                                   make_info_page(hit['data']))

        # rename/move matched file
        outfile = '%s%s' % (base_name, hit['ext'])
        os.rename(hit['path'], outfile)
        flog.write('%s|%s\n' %
                   (os.path.basename(hit['path']), os.path.basename(outfile)))
    flog.close()
    pywikibot.output('Created %s' % logfile)
示例#5
0
def skipped_info(batch, skipped, view="last-new"):
    """
    Provide information/diffs for a list of skipped files.

    :param batch: The category added to all files of the format
        "Category:Media contributed by LSH: <batch>".
    :param skipped: list of skipped Commons filenames (incl. namespace).
    :param view: the diff view which you want. Allows last-new,
        first-last, first-new with last-new as default.
    """
    allowed_views = ('last-new', 'first-last', 'first-new')
    if view not in allowed_views:
        pywikibot.output("view must be one of the allowed_views: {}".format(
                         ', '.join(allowed_views)))
    data = load_and_dump_LSH_info(batch)
    site = pywikibot.Site('commons', 'commons')
    for orig_name, image_data in data.items():
        title = "File:{:s}".format(image_data['filename'])
        if title not in skipped:
            continue
        page = pywikibot.Page(site, title)
        last_text = page.get()
        new_text = make_info_page(image_data)
        first_text = page.getOldVersion(page.oldest_revision.revid)
        if view == 'last-new':
            print_diff(last_text, new_text)
        elif view == 'first-last':
            print_diff(first_text, last_text)
        elif view == 'first-new':
            print_diff(first_text, new_text)
        raw_input(u"Press enter for next.")
示例#6
0
 def test_make_info_page_no_meta_cats(self):
     self.data['meta_cats'] = []
     expected = ('{{Infobox\n| param1 = value1 \n}}\n\n'
                 '<!-- Content categories -->\n'
                 '[[Category:cat1]]\n'
                 '[[Category:cat2]]')
     self.assertEqual(make_info_page(self.data), expected)
示例#7
0
 def test_make_info_page(self):
     expected = ('{{Infobox\n| param1 = value1 \n}}\n\n'
                 '<!-- Metadata categories -->\n'
                 '[[Category:A meta_Cat]]'
                 '\n\n'
                 '<!-- Content categories -->\n'
                 '[[Category:cat1]]\n'
                 '[[Category:cat2]]')
     self.assertEqual(make_info_page(self.data), expected)
示例#8
0
 def test_make_info_page_preview(self):
     expected = ("Filename: The_filename.<ext>\n"
                 "{{Infobox\n| param1 = value1 \n}}\n\n"
                 "''Metadata categories:''\n"
                 "* [[:Category:A meta_Cat]]"
                 "\n\n"
                 "''Content categories:''\n"
                 "* [[:Category:cat1]]\n"
                 "* [[:Category:cat2]]")
     self.assertEqual(make_info_page(self.data, preview=True), expected)
def run(data, selection, log_file, output, media_ext):
    # fall back on defaults
    data_dir = os.path.split(data)[0]
    if not selection:
        selection = os.path.join(data_dir, DEFAULTS.get('selection'))
    selection_dir = os.path.split(selection)[0]
    if not output:
        output = os.path.join(selection_dir, DEFAULTS.get('output'))

    data = common.open_and_read_file(data, as_json=True)
    demo = common.open_and_read_file(selection, as_json=True)

    # load log
    log = {}
    if log_file:
        log_text = common.open_and_read_file(log_file)
        for l in log_text.split('\n'):
            if ' -- ' in l:
                idno, reason = l.split(' -- ')
                log[idno] = reason

    out = []
    for idno in sorted(demo.keys()):
        info = ''
        if idno in data:
            info = mi.make_info_page(data[idno], preview=True)
            if media_ext:
                info = info.replace('<ext>', media_ext)
        elif log:
            info = log[idno]
        else:
            info = 'no make_info data found'
        out.append('== {idno} -- {reason} ==\n{info}'.format(
            reason=demo.get(idno), idno=idno, info=info))

    common.open_and_write_file(output, '\n\n'.join(out))
示例#10
0
def up_all_from_url(info_path,
                    cutoff=None,
                    target='upload_logs',
                    file_exts=None,
                    verbose=False,
                    test=False,
                    target_site=None,
                    only=None,
                    skip=None):
    """
    Upload all images provided as urls in a make_info json file.

    Media (image) files and metadata files with the expected extension .info
    should be in the same directory. Metadata files should contain the entirety
    of the desired description page (in wikitext).

    Outputs separate logfiles for files triggering errors, warnings (and
    successful) so that these can be used in latter runs.

    @param info_path: path to the make_info json file
    @param cutoff: number of files to upload (defaults to all)
    @param target: sub-directory for log files (defaults to "upload_logs")
    @param file_exts: tuple of allowed file extensions (defaults to FILE_EXTS)
    @param verbose: whether to output confirmation after each upload
    @param test: set to True to test but not upload
    @param target_site: pywikibot.Site to which file should be uploaded,
        defaults to Commons.
    @param only: list of urls to upload, if provided all others will be skipped
    @param skip: list of urls to skip, all others will be uploaded
    """
    # set defaults unless overridden
    file_exts = file_exts or FILE_EXTS
    target_site = target_site or pywikibot.Site('commons', 'commons')
    target_site.login()

    # load info file
    info_datas = common.open_and_read_file(info_path, as_json=True)

    # create target directory if it doesn't exist
    output_dir = os.path.join(os.path.dirname(info_path), target)
    common.create_dir(output_dir)

    # create all log files
    logs = {
        'success': common.LogFile(output_dir, 'success.log'),
        'warning': common.LogFile(output_dir, 'warnings.log'),
        'error': common.LogFile(output_dir, 'errors.log'),
        'general': common.LogFile(output_dir, 'uploader.log')
    }

    # shortcut to the general/verbose logfile
    flog = logs['general']

    # filtering based on entries in only/skip
    kill_list = set()
    if only:
        kill_list |= set(info_datas.keys()) - set(only)  # difference
    if skip:
        kill_list |= set(info_datas.keys()) & set(skip)  # intersection
    for key in kill_list:
        del info_datas[key]
    flog.write_w_timestamp('{} files remain to upload after filtering'.format(
        len(info_datas)))

    counter = 1
    for url, data in info_datas.items():
        if cutoff and counter > cutoff:
            break

        # verify that the file extension is ok
        try:
            ext = verify_url_file_extension(url, file_exts)
        except common.MyError as e:
            flog.write_w_timestamp(e)
            continue

        # verify that info and output filenames are provided
        if not data['info']:
            flog.write_w_timestamp(
                '{url}: Found url missing the info field (at least)'.format(
                    url=url))
            continue
        elif not data['filename']:
            flog.write_w_timestamp(
                '{url}: Found url missing the output filename'.format(url=url))
            continue

        # prepare upload
        txt = make_info_page(data)
        filename = '{filename}{ext}'.format(filename=data['filename'], ext=ext)

        if test:
            pywikibot.output(
                'Test upload "{filename}" from "{url}" with the following '
                'description:\n{txt}\n'.format(filename=filename,
                                               url=url,
                                               txt=txt))
            counter += 1
            continue
        # stop here if testing

        result = upload_single_file(filename,
                                    url,
                                    txt,
                                    target_site,
                                    upload_if_badprefix=True)
        if result.get('error'):
            logs['error'].write(url)
        elif result.get('warning'):
            logs['warning'].write(url)
        else:
            logs['success'].write(url)
        if verbose:
            pywikibot.output(result.get('log'))

        flog.write_w_timestamp(result.get('log'))
        counter += 1

    for log in logs.values():
        pywikibot.output(log.close_and_confirm())
示例#11
0
 def test_make_info_page_no_cats(self):
     self.data['meta_cats'] = []
     self.data['cats'] = []
     expected = '{{Infobox\n| param1 = value1 \n}}'
     self.assertEqual(make_info_page(self.data), expected)
示例#12
0
 def test_make_info_page_no_content_cats(self):
     self.data['cats'] = []
     expected = ('{{Infobox\n| param1 = value1 \n}}\n\n'
                 '<!-- Metadata categories -->\n'
                 '[[Category:A meta_Cat]]')
     self.assertEqual(make_info_page(self.data), expected)