def skipped_info(batch, skipped, view="last-new"): """ Provide information/diffs for a list of skipped files. :param batch: The category added to all files of the format "Category:Media contributed by LSH: <batch>". :param skipped: list of skipped Commons filenames (incl. namespace). :param view: the diff view which you want. Allows last-new, first-last, first-new with last-new as default. """ allowed_views = ('last-new', 'first-last', 'first-new') if view not in allowed_views: pywikibot.output("view must be one of the allowed_views: {}".format( ', '.join(allowed_views))) data = load_and_dump_LSH_info(batch) site = pywikibot.Site('commons', 'commons') for orig_name, image_data in data.items(): title = "File:{:s}".format(image_data['filename']) if title not in skipped: continue page = pywikibot.Page(site, title) last_text = page.get() new_text = make_info_page(image_data) first_text = page.getOldVersion(page.oldest_revision.revid) if view == 'last-new': print_diff(last_text, new_text) elif view == 'first-last': print_diff(first_text, last_text) elif view == 'first-new': print_diff(first_text, new_text) raw_input(u"Press enter for next.")
def process_info_blob(info, site, log, summary, dry, cutoff=None): """ Process each image in an info blob. :param info: output of make_info :param site: the pywikibot.Site corresponding to the image repository :param log: the log function to use :param summary: the edit summary to use :param dry: if this is a dry run :param cutoff: Number of images to process before terminating. If None, assume all. """ # @TODO: Check that File: is not already included in filename pywikibot.output('Processing {} images.'.format(len(info))) counter = 0 for orig_name, image_data in info.items(): if cutoff and cutoff <= counter: pywikibot.output('Reached cutoff.') return title = "File:{:s}".format(image_data['filename']) page = pywikibot.Page(site, title) new_text = make_info_page(image_data) updated_page = UpdatedPage(page, new_text, dry_run=dry, summary=summary) log_entry = updated_page.handle_single_page() if updated_page.update_page(): log_entry = "Updated | {}".format(log_entry) else: log_entry = "Skipped | {}".format(log_entry) log("{title:s} | {log:s}\n".format( title=page.title(), log=log_entry)) counter += 1
def process_info_blob(info, site, log, summary, dry, cutoff=None): """ Process each image in an info blob. :param info: output of make_info :param site: the pywikibot.Site corresponding to the image repository :param log: the log function to use :param summary: the edit summary to use :param dry: if this is a dry run :param cutoff: Number of images to process before terminating. If None, assume all. """ # @TODO: Check that File: is not already included in filename pywikibot.output('Processing {} images.'.format(len(info))) counter = 0 for orig_name, image_data in info.items(): if cutoff and cutoff <= counter: pywikibot.output('Reached cutoff.') return title = "File:{:s}".format(image_data['filename']) page = pywikibot.Page(site, title) new_text = make_info_page(image_data) updated_page = UpdatedPage(page, new_text, dry_run=dry, summary=summary) log_entry = updated_page.handle_single_page() if updated_page.update_page(): log_entry = "Updated | {}".format(log_entry) else: log_entry = "Skipped | {}".format(log_entry) log("{title:s} | {log:s}\n".format(title=page.title(), log=log_entry)) counter += 1
def makeAndRename(hitlist, outPath): """ Given a hitlist create the info files and rename the matched file. @param hitlist: the output of makeHitlist @param outPath: the directory in which to store info + renamed files """ # create outPath if it doesn't exist common.create_dir(outPath) # logfile logfile = os.path.join(outPath, '¤generator.log') flog = open(logfile, 'a', encoding='utf-8') for hit in hitlist: base_name = os.path.join(outPath, hit['data']['filename']) # output info file common.open_and_write_file('%s.info' % base_name, make_info_page(hit['data'])) # rename/move matched file outfile = '%s%s' % (base_name, hit['ext']) os.rename(hit['path'], outfile) flog.write('%s|%s\n' % (os.path.basename(hit['path']), os.path.basename(outfile))) flog.close() pywikibot.output('Created %s' % logfile)
def test_make_info_page_no_meta_cats(self): self.data['meta_cats'] = [] expected = ('{{Infobox\n| param1 = value1 \n}}\n\n' '<!-- Content categories -->\n' '[[Category:cat1]]\n' '[[Category:cat2]]') self.assertEqual(make_info_page(self.data), expected)
def test_make_info_page(self): expected = ('{{Infobox\n| param1 = value1 \n}}\n\n' '<!-- Metadata categories -->\n' '[[Category:A meta_Cat]]' '\n\n' '<!-- Content categories -->\n' '[[Category:cat1]]\n' '[[Category:cat2]]') self.assertEqual(make_info_page(self.data), expected)
def test_make_info_page_preview(self): expected = ("Filename: The_filename.<ext>\n" "{{Infobox\n| param1 = value1 \n}}\n\n" "''Metadata categories:''\n" "* [[:Category:A meta_Cat]]" "\n\n" "''Content categories:''\n" "* [[:Category:cat1]]\n" "* [[:Category:cat2]]") self.assertEqual(make_info_page(self.data, preview=True), expected)
def run(data, selection, log_file, output, media_ext): # fall back on defaults data_dir = os.path.split(data)[0] if not selection: selection = os.path.join(data_dir, DEFAULTS.get('selection')) selection_dir = os.path.split(selection)[0] if not output: output = os.path.join(selection_dir, DEFAULTS.get('output')) data = common.open_and_read_file(data, as_json=True) demo = common.open_and_read_file(selection, as_json=True) # load log log = {} if log_file: log_text = common.open_and_read_file(log_file) for l in log_text.split('\n'): if ' -- ' in l: idno, reason = l.split(' -- ') log[idno] = reason out = [] for idno in sorted(demo.keys()): info = '' if idno in data: info = mi.make_info_page(data[idno], preview=True) if media_ext: info = info.replace('<ext>', media_ext) elif log: info = log[idno] else: info = 'no make_info data found' out.append('== {idno} -- {reason} ==\n{info}'.format( reason=demo.get(idno), idno=idno, info=info)) common.open_and_write_file(output, '\n\n'.join(out))
def up_all_from_url(info_path, cutoff=None, target='upload_logs', file_exts=None, verbose=False, test=False, target_site=None, only=None, skip=None): """ Upload all images provided as urls in a make_info json file. Media (image) files and metadata files with the expected extension .info should be in the same directory. Metadata files should contain the entirety of the desired description page (in wikitext). Outputs separate logfiles for files triggering errors, warnings (and successful) so that these can be used in latter runs. @param info_path: path to the make_info json file @param cutoff: number of files to upload (defaults to all) @param target: sub-directory for log files (defaults to "upload_logs") @param file_exts: tuple of allowed file extensions (defaults to FILE_EXTS) @param verbose: whether to output confirmation after each upload @param test: set to True to test but not upload @param target_site: pywikibot.Site to which file should be uploaded, defaults to Commons. @param only: list of urls to upload, if provided all others will be skipped @param skip: list of urls to skip, all others will be uploaded """ # set defaults unless overridden file_exts = file_exts or FILE_EXTS target_site = target_site or pywikibot.Site('commons', 'commons') target_site.login() # load info file info_datas = common.open_and_read_file(info_path, as_json=True) # create target directory if it doesn't exist output_dir = os.path.join(os.path.dirname(info_path), target) common.create_dir(output_dir) # create all log files logs = { 'success': common.LogFile(output_dir, 'success.log'), 'warning': common.LogFile(output_dir, 'warnings.log'), 'error': common.LogFile(output_dir, 'errors.log'), 'general': common.LogFile(output_dir, 'uploader.log') } # shortcut to the general/verbose logfile flog = logs['general'] # filtering based on entries in only/skip kill_list = set() if only: kill_list |= set(info_datas.keys()) - set(only) # difference if skip: kill_list |= set(info_datas.keys()) & set(skip) # intersection for key in kill_list: del info_datas[key] flog.write_w_timestamp('{} files remain to upload after filtering'.format( len(info_datas))) counter = 1 for url, data in info_datas.items(): if cutoff and counter > cutoff: break # verify that the file extension is ok try: ext = verify_url_file_extension(url, file_exts) except common.MyError as e: flog.write_w_timestamp(e) continue # verify that info and output filenames are provided if not data['info']: flog.write_w_timestamp( '{url}: Found url missing the info field (at least)'.format( url=url)) continue elif not data['filename']: flog.write_w_timestamp( '{url}: Found url missing the output filename'.format(url=url)) continue # prepare upload txt = make_info_page(data) filename = '{filename}{ext}'.format(filename=data['filename'], ext=ext) if test: pywikibot.output( 'Test upload "{filename}" from "{url}" with the following ' 'description:\n{txt}\n'.format(filename=filename, url=url, txt=txt)) counter += 1 continue # stop here if testing result = upload_single_file(filename, url, txt, target_site, upload_if_badprefix=True) if result.get('error'): logs['error'].write(url) elif result.get('warning'): logs['warning'].write(url) else: logs['success'].write(url) if verbose: pywikibot.output(result.get('log')) flog.write_w_timestamp(result.get('log')) counter += 1 for log in logs.values(): pywikibot.output(log.close_and_confirm())
def test_make_info_page_no_cats(self): self.data['meta_cats'] = [] self.data['cats'] = [] expected = '{{Infobox\n| param1 = value1 \n}}' self.assertEqual(make_info_page(self.data), expected)
def test_make_info_page_no_content_cats(self): self.data['cats'] = [] expected = ('{{Infobox\n| param1 = value1 \n}}\n\n' '<!-- Metadata categories -->\n' '[[Category:A meta_Cat]]') self.assertEqual(make_info_page(self.data), expected)