Пример #1
0
def makeAndRename(hitlist, outPath):
    """
    Given a hitlist create the info files and rename the matched file.

    @param hitlist: the output of makeHitlist
    @param outPath: the directory in which to store info + renamed files
    """
    # create outPath if it doesn't exist
    common.create_dir(outPath)

    # logfile
    logfile = os.path.join(outPath, '¤generator.log')
    flog = open(logfile, 'a', encoding='utf-8')

    for hit in hitlist:
        base_name = os.path.join(outPath, hit['data']['filename'])

        # output info file
        common.open_and_write_file('%s.info' % base_name,
                                   make_info_page(hit['data']))

        # rename/move matched file
        outfile = '%s%s' % (base_name, hit['ext'])
        os.rename(hit['path'], outfile)
        flog.write('%s|%s\n' %
                   (os.path.basename(hit['path']), os.path.basename(outfile)))
    flog.close()
    pywikibot.output('Created %s' % logfile)
def load_mappings(update_mappings, mappings_dir=None, load_mapping_lists=None):
    """
    Update mapping files, load these and package appropriately.

    :param update_mappings: whether to first download the latest mappings
    :param mappings_dir: path to directory in which mappings are found
    :param load_mapping_lists: the root path to any mapping_lists which should
        be loaded.
    """
    mappings = {}
    mappings_dir = mappings_dir or MAPPINGS_DIR
    common.create_dir(mappings_dir)  # ensure it exists

    expeditions_file = path.join(mappings_dir, 'expeditions.json')
    museums_file = path.join(mappings_dir, 'museums.json')

    # static files
    mappings['expeditions'] = common.open_and_read_file(expeditions_file,
                                                        as_json=True)
    mappings['museums'] = common.open_and_read_file(museums_file, as_json=True)

    if load_mapping_lists:
        load_mapping_lists_mappings(mappings_dir, update_mappings, mappings,
                                    load_mapping_lists)

    pywikibot.output('Loaded all mappings')
    return mappings
Пример #3
0
def rename(base_dir, sub_cat, in_filename, log_file='move.log'):
    """
    Identify any files to replace and rename them to their commons names.

    :param base_dir: Path to directory in which replacement image files are
        found.
    :param sub_cat: The name of the subdirectory into which processed files
        should be moved.
    :param in_filename: The photoAll.csv file filtered to only contain the
        files to replace.
    :param log_file: The name of the log file to be created (in base_dir).
    """
    # Load indata
    in_filename = common.modify_path(base_dir, in_filename)
    header_check = u'PhoId|PhoObjId|PhoBeschreibungM|PhoAufnahmeortS|' \
                   u'PhoSwdS|MulId|filnamn|AdrVorNameS|AdrNameS|PhoSystematikS'
    data = csv.csv_file_to_dict(in_filename,
                                "filnamn",
                                header_check,
                                keep=('PhoSystematikS', 'filnamn'),
                                delimiter='|',
                                codec='utf-16')

    # reformat the commons filenames
    url_prefix = u'https://commons.wikimedia.org/wiki/File:'
    for k, v in data.iteritems():
        if v['PhoSystematikS'].startswith(url_prefix):
            data[k] = v['PhoSystematikS'][len(url_prefix):]
        else:
            pywikibot.output("error in indatafile: %s, %s" % (k, v))

    # find candidate files
    candidates = prep.find_files(base_dir, ('.tif', ), subdir=False)

    # rename the files
    sub_cat = common.modify_path(base_dir, sub_cat)
    log_file = common.modify_path(base_dir, log_file)
    common.create_dir(sub_cat)
    log = []

    for candidate in candidates:
        base_name = os.path.basename(candidate)
        if base_name not in data.keys():
            log.append('%s not found in csv file' % base_name)
            continue

        commons_name = data.pop(base_name)
        commons_name = common.modify_path(sub_cat, commons_name)
        os.rename(candidate, commons_name)

    for k in data.keys():
        log.append('%s not found on disk' % k)

    common.open_and_write_file(log_file, '\n'.join(log), codec='utf-8')
    pywikibot.output(u'Created %s' % log_file)
Пример #4
0
def rename(base_dir, sub_cat, in_filename, log_file='move.log'):
    """
    Identify any files to replace and rename them to their commons names.

    :param base_dir: Path to directory in which replacement image files are
        found.
    :param sub_cat: The name of the subdirectory into which processed files
        should be moved.
    :param in_filename: The photoAll.csv file filtered to only contain the
        files to replace.
    :param log_file: The name of the log file to be created (in base_dir).
    """
    # Load indata
    in_filename = common.modify_path(base_dir, in_filename)
    header_check = u'PhoId|PhoObjId|PhoBeschreibungM|PhoAufnahmeortS|' \
                   u'PhoSwdS|MulId|filnamn|AdrVorNameS|AdrNameS|PhoSystematikS'
    data = csv.csv_file_to_dict(in_filename, "filnamn", header_check,
                                keep=('PhoSystematikS', 'filnamn'),
                                delimiter='|', codec='utf-16')

    # reformat the commons filenames
    url_prefix = u'https://commons.wikimedia.org/wiki/File:'
    for k, v in data.iteritems():
        if v['PhoSystematikS'].startswith(url_prefix):
            data[k] = v['PhoSystematikS'][len(url_prefix):]
        else:
            pywikibot.output("error in indatafile: %s, %s" % (k, v))

    # find candidate files
    candidates = prep.find_files(base_dir, ('.tif', ), subdir=False)

    # rename the files
    sub_cat = common.modify_path(base_dir, sub_cat)
    log_file = common.modify_path(base_dir, log_file)
    common.create_dir(sub_cat)
    log = []

    for candidate in candidates:
        base_name = os.path.basename(candidate)
        if base_name not in data.keys():
            log.append('%s not found in csv file' % base_name)
            continue

        commons_name = data.pop(base_name)
        commons_name = common.modify_path(sub_cat, commons_name)
        os.rename(candidate, commons_name)

    for k in data.keys():
        log.append('%s not found on disk' % k)

    common.open_and_write_file(log_file, '\n'.join(log), codec='utf-8')
    pywikibot.output(u'Created %s' % log_file)
def move_matching_files(files, data, out_dir):
    """Rename and move matching files.

    @param files: output from find_all_files
    @param data: the data loaded from the csv
    @param out_dir: path of the directory where output files should be placed
    """
    common.create_dir(out_dir)
    for key, in_path in files.iteritems():
        if key in data.keys():
            path_name, ext = os.path.splitext(in_path)
            file_name_out = u'{}{}'.format(data[key][u'Commons'], ext)
            out_path = common.modify_path(out_dir, file_name_out)
            os.rename(in_path, out_path)
Пример #6
0
    def __init__(self,
                 page,
                 parameters,
                 header_template=None,
                 row_template_name='User:André Costa (WMSE)/mapping-row',
                 site=None,
                 mapping_dir=None,
                 wikitext_dir=None,
                 options=None):
        """
        Initialise an mapping list.

        @param page: name of page (incl. prefixes) where list (should) live.
        @param parameters: a list of mapping keys in data to use as parameters
            or an OrderedDict with {data key: template parameter} pairs.
        @param header_template: the header template (incl. any parameters) and
            "{{ }}".
        @param row_template_name: the name of the row template.
            (Default: User:André Costa (WMSE)/mapping-row)
        @param mapping_dir: Directory in which to save scraped mappings.
            (Default: OUT_PATH)
        @param wikitext_dir: Directory in which to save created wikitext
            mapping lists. (Default: OUT_PATH)
        @param site: pywikibot.Site on which the page lives.
            (Default: Wikimedia Commons)
        @param options: dict of overriding option settings.
        """
        self.options = self.set_options(options)
        self.page_name = page.rpartition('/')[2]
        self.page = MappingList.construct_page(site, page)
        self.header_template = header_template
        self.row_template = row_template_name
        self.mapping_dir = mapping_dir or OUT_PATH
        self.wikitext_dir = wikitext_dir or OUT_PATH

        # store as dict internally for uniform handling
        if isinstance(parameters, list):
            self.parameters = OrderedDict([(k, k) for k in parameters])
        else:
            self.parameters = parameters

        # create out_paths if they don't exist
        common.create_dir(self.mapping_dir)
        common.create_dir(self.wikitext_dir)
Пример #7
0
def upload_all(base_dir,
               sub_dir=u'Uploaded',
               log_file='upload.log',
               verbose=True):
    """
    Upload the renamed files.

    We cannot just use uploader.up_all since  there are no corresponding .info
    files.
    """
    commons = pywikibot.Site('commons', 'commons')
    commons.login()

    upload_comment = u'Source image improved by the institution #LSH'

    # create target directories if they don't exist
    done_dir = common.modify_path(base_dir, sub_dir)
    error_dir = u'%s_errors' % done_dir
    warnings_dir = u'%s_warnings' % done_dir
    common.create_dir(done_dir)
    common.create_dir(error_dir)
    common.create_dir(warnings_dir)

    # logfile
    logfile = common.modify_path(base_dir, log_file)
    flog = codecs.open(logfile, 'a', 'utf-8')

    # find candidate files
    media_files = prep.find_files(base_dir, ('.tif', ), subdir=False)
    for media_file in media_files:
        file_name = os.path.basename(media_file)
        target_dir = None
        result = uploader.upload_single_file(file_name,
                                             media_file,
                                             upload_comment,
                                             commons,
                                             overwrite_page_exists=True)

        if result.get('error'):
            target_dir = error_dir
        elif result.get('warning'):
            target_dir = warnings_dir
        else:
            target_dir = done_dir
        if verbose:
            pywikibot.output(result.get('log'))

        flog.write(u'%s\n' % result.get('log'))
        os.rename(media_file, common.modify_path(target_dir, file_name))
        flog.flush()

    flog.close()
    pywikibot.output(u'Created %s' % logfile)
Пример #8
0
def upload_all(base_dir, sub_dir=u'Uploaded', log_file='upload.log',
               verbose=True):
    """
    Upload the renamed files.

    We cannot just use uploader.up_all since  there are no corresponding .info
    files.
    """
    commons = pywikibot.Site('commons', 'commons')
    commons.login()

    upload_comment = u'Source image improved by the institution #LSH'

    # create target directories if they don't exist
    done_dir = common.modify_path(base_dir, sub_dir)
    error_dir = u'%s_errors' % done_dir
    warnings_dir = u'%s_warnings' % done_dir
    common.create_dir(done_dir)
    common.create_dir(error_dir)
    common.create_dir(warnings_dir)

    # logfile
    logfile = common.modify_path(base_dir, log_file)
    flog = codecs.open(logfile, 'a', 'utf-8')

    # find candidate files
    media_files = prep.find_files(base_dir, ('.tif', ), subdir=False)
    for media_file in media_files:
        file_name = os.path.basename(media_file)
        target_dir = None
        result = uploader.upload_single_file(
            file_name, media_file, upload_comment, commons,
            overwrite_page_exists=True)

        if result.get('error'):
            target_dir = error_dir
        elif result.get('warning'):
            target_dir = warnings_dir
        else:
            target_dir = done_dir
        if verbose:
            pywikibot.output(result.get('log'))

        flog.write(u'%s\n' % result.get('log'))
        os.rename(media_file, common.modify_path(target_dir, file_name))
        flog.flush()

    flog.close()
    pywikibot.output(u'Created %s' % logfile)
Пример #9
0
def load_mappings(update_mappings, mappings_dir=None,
                  load_mapping_lists=None):
    """
    Update mapping files, load these and package appropriately.

    :param update_mappings: whether to first download the latest mappings
    :param mappings_dir: path to directory in which mappings are found
    :param load_mapping_lists: the root path to any mapping_lists which should
        be loaded.
    """
    mappings = {}
    mappings_dir = mappings_dir or MAPPINGS_DIR
    common.create_dir(mappings_dir)  # ensure it exists

    parish_file = os.path.join(mappings_dir, 'socken.json')
    muni_file = os.path.join(mappings_dir, 'kommun.json')
    county_file = os.path.join(mappings_dir, 'lan.json')
    province_file = os.path.join(mappings_dir, 'province.json')
    country_file = os.path.join(mappings_dir, 'country.json')

    if update_mappings:
        query_props = {'P373': 'commonscat'}
        lang = 'sv'
        mappings['parish'] = query_to_lookup(
            build_query('P777', optional_props=query_props.keys(), lang=lang),
            props=query_props, lang=lang)
        mappings['municipality'] = query_to_lookup(
            build_query('P525', optional_props=query_props.keys(), lang=lang),
            props=query_props, lang=lang)
        mappings['county'] = query_to_lookup(
            build_query('P507', optional_props=query_props.keys(), lang=lang),
            props=query_props, lang=lang)

        # dump to mappings
        common.open_and_write_file(
            parish_file, mappings['parish'], as_json=True)
        common.open_and_write_file(
            muni_file, mappings['municipality'], as_json=True)
        common.open_and_write_file(
            county_file, mappings['county'], as_json=True)

    else:
        mappings['parish'] = common.open_and_read_file(
            parish_file, as_json=True)
        mappings['municipality'] = common.open_and_read_file(
            muni_file, as_json=True)
        mappings['county'] = common.open_and_read_file(
            county_file, as_json=True)

    # static files
    mappings['province'] = common.open_and_read_file(
        province_file, as_json=True)
    mappings['country'] = common.open_and_read_file(
        country_file, as_json=True)

    if load_mapping_lists:
        load_mapping_lists_mappings(
            mappings_dir, update_mappings, mappings, load_mapping_lists)

    pywikibot.output('Loaded all mappings')
    return mappings
Пример #10
0
def up_all_from_url(info_path,
                    cutoff=None,
                    target='upload_logs',
                    file_exts=None,
                    verbose=False,
                    test=False,
                    target_site=None,
                    only=None,
                    skip=None):
    """
    Upload all images provided as urls in a make_info json file.

    Media (image) files and metadata files with the expected extension .info
    should be in the same directory. Metadata files should contain the entirety
    of the desired description page (in wikitext).

    Outputs separate logfiles for files triggering errors, warnings (and
    successful) so that these can be used in latter runs.

    @param info_path: path to the make_info json file
    @param cutoff: number of files to upload (defaults to all)
    @param target: sub-directory for log files (defaults to "upload_logs")
    @param file_exts: tuple of allowed file extensions (defaults to FILE_EXTS)
    @param verbose: whether to output confirmation after each upload
    @param test: set to True to test but not upload
    @param target_site: pywikibot.Site to which file should be uploaded,
        defaults to Commons.
    @param only: list of urls to upload, if provided all others will be skipped
    @param skip: list of urls to skip, all others will be uploaded
    """
    # set defaults unless overridden
    file_exts = file_exts or FILE_EXTS
    target_site = target_site or pywikibot.Site('commons', 'commons')
    target_site.login()

    # load info file
    info_datas = common.open_and_read_file(info_path, as_json=True)

    # create target directory if it doesn't exist
    output_dir = os.path.join(os.path.dirname(info_path), target)
    common.create_dir(output_dir)

    # create all log files
    logs = {
        'success': common.LogFile(output_dir, 'success.log'),
        'warning': common.LogFile(output_dir, 'warnings.log'),
        'error': common.LogFile(output_dir, 'errors.log'),
        'general': common.LogFile(output_dir, 'uploader.log')
    }

    # shortcut to the general/verbose logfile
    flog = logs['general']

    # filtering based on entries in only/skip
    kill_list = set()
    if only:
        kill_list |= set(info_datas.keys()) - set(only)  # difference
    if skip:
        kill_list |= set(info_datas.keys()) & set(skip)  # intersection
    for key in kill_list:
        del info_datas[key]
    flog.write_w_timestamp('{} files remain to upload after filtering'.format(
        len(info_datas)))

    counter = 1
    for url, data in info_datas.items():
        if cutoff and counter > cutoff:
            break

        # verify that the file extension is ok
        try:
            ext = verify_url_file_extension(url, file_exts)
        except common.MyError as e:
            flog.write_w_timestamp(e)
            continue

        # verify that info and output filenames are provided
        if not data['info']:
            flog.write_w_timestamp(
                '{url}: Found url missing the info field (at least)'.format(
                    url=url))
            continue
        elif not data['filename']:
            flog.write_w_timestamp(
                '{url}: Found url missing the output filename'.format(url=url))
            continue

        # prepare upload
        txt = make_info_page(data)
        filename = '{filename}{ext}'.format(filename=data['filename'], ext=ext)

        if test:
            pywikibot.output(
                'Test upload "{filename}" from "{url}" with the following '
                'description:\n{txt}\n'.format(filename=filename,
                                               url=url,
                                               txt=txt))
            counter += 1
            continue
        # stop here if testing

        result = upload_single_file(filename,
                                    url,
                                    txt,
                                    target_site,
                                    upload_if_badprefix=True)
        if result.get('error'):
            logs['error'].write(url)
        elif result.get('warning'):
            logs['warning'].write(url)
        else:
            logs['success'].write(url)
        if verbose:
            pywikibot.output(result.get('log'))

        flog.write_w_timestamp(result.get('log'))
        counter += 1

    for log in logs.values():
        pywikibot.output(log.close_and_confirm())
Пример #11
0
def up_all(in_path,
           cutoff=None,
           target='Uploaded',
           file_exts=None,
           verbose=False,
           test=False,
           target_site=None,
           chunked=True):
    """
    Upload all matched files in the supplied directory.

    Media (image) files and metadata files with the expected extension .info
    should be in the same directory. Metadata files should contain the entirety
    of the desired description page (in wikitext).

    Moves each file to one the target folders after processing.

    @param in_path: path to directory with files to upload
    @param cutoff: number of files to upload (defaults to all)
    @param target: sub-directory for uploaded files (defaults to "Uploaded")
    @param file_exts: tuple of allowed file extensions (defaults to FILE_EXTS)
    @param verbose: whether to output confirmation after each upload
    @param test: set to True to test but not upload
    @param target_site: pywikibot.Site to which file should be uploaded,
        defaults to Commons.
    @param chunked: Whether to do chunked uploading or not.
    """
    # set defaults unless overridden
    file_exts = file_exts or FILE_EXTS
    target_site = target_site or pywikibot.Site('commons', 'commons')
    target_site.login()

    # Verify in_path
    if not os.path.isdir(in_path):
        pywikibot.output('The provided in_path was not a valid '
                         'directory: %s' % in_path)
        exit()

    # create target directories if they don't exist
    done_dir = os.path.join(in_path, target)
    error_dir = '%s_errors' % done_dir
    warnings_dir = '%s_warnings' % done_dir
    common.create_dir(done_dir)
    common.create_dir(error_dir)
    common.create_dir(warnings_dir)

    # logfile
    flog = common.LogFile(in_path, '¤uploader.log')

    # find all content files
    found_files = prepUpload.find_files(path=in_path,
                                        file_exts=file_exts,
                                        subdir=False)
    counter = 1
    for f in found_files:
        if cutoff and counter > cutoff:
            break
        # verify that there is a matching info file
        info_file = '%s.info' % os.path.splitext(f)[0]
        base_name = os.path.basename(f)
        base_info_name = os.path.basename(info_file)
        if not os.path.exists(info_file):
            flog.write_w_timestamp(
                '{0}: Found multimedia file without info'.format(base_name))
            continue

        # prepare upload
        txt = common.open_and_read_file(info_file)

        if test:
            pywikibot.output('Test upload "%s" with the following '
                             'description:\n%s\n' % (base_name, txt))
            counter += 1
            continue
        # stop here if testing

        target_dir = None
        result = upload_single_file(base_name,
                                    f,
                                    txt,
                                    target_site,
                                    upload_if_badprefix=True,
                                    chunked=chunked)
        if result.get('error'):
            target_dir = error_dir
        elif result.get('warning'):
            target_dir = warnings_dir
        else:
            target_dir = done_dir
        if verbose:
            pywikibot.output(result.get('log'))

        flog.write_w_timestamp(result.get('log'))
        os.rename(f, os.path.join(target_dir, base_name))
        os.rename(info_file, os.path.join(target_dir, base_info_name))
        counter += 1

    pywikibot.output(flog.close_and_confirm())