def clean_lgw_content():

    # paths
    import_path = os.path.join(constants.root_path, 'code', 'lgw-import')
    entries_file = os.path.join(import_path, '_lgw.json')
    cleaned_entries_file = os.path.join(import_path, '_lgw.cleaned.json')

    # load entries
    text = utils.read_text(entries_file)
    entries = json.loads(text)

    # rename keys
    key_replacements = (('developer', ('Developer', 'Developers')),
                        ('code license', ('Code license', 'Code licenses')),
                        ('engine', ('Engine', 'Engines')), ('genre',
                                                            ('Genre',
                                                             'Genres')),
                        ('library', ('Library',
                                     'Libraries')), ('assets license',
                                                     ('Media license',
                                                      'Media licenses')),
                        ('code language', ('P. language', 'P. languages')),
                        ('home', ('Homepage', )), ('platform',
                                                   ('Platforms', )),
                        ('tracker', ('Bug/Feature Tracker', )),
                        ('repo', ('Source Code', )), ('forum', ('Forum', )),
                        ('chat', ('Chat', )), ('origin', ('Origin', )),
                        ('dev home',
                         ('Development Project', )), ('last active',
                                                      ('Release date', )))
    for index, entry in enumerate(entries):
        for new_key, old_keys in key_replacements:
            for key in old_keys:
                if key in entry:
                    entry[new_key] = entry[key]
                    del entry[key]
                    break
        entries[index] = entry

    # ignore keys
    ignored_keys = ('origin', 'Latest\xa0release')
    for index, entry in enumerate(entries):
        for key in ignored_keys:
            if key in entry:
                del entry[key]
        entries[index] = entry

    # check for unique field names
    unique_fields = set()
    for entry in entries:
        unique_fields.update(entry.keys())
    print('unique lgw fields: {}'.format(sorted(list(unique_fields))))

    # which fields are mandatory
    mandatory_fields = unique_fields.copy()
    for entry in entries:
        remove_fields = [
            field for field in mandatory_fields if field not in entry
        ]
        mandatory_fields -= set(remove_fields)
    print('mandatory lgw fields: {}'.format(sorted(list(mandatory_fields))))

    # statistics before
    print('field contents before')
    fields = sorted(
        list(unique_fields -
             set(('description', 'external links', 'dev home', 'forum', 'home',
                  'linux-packages', 'developer', 'chat', 'tracker',
                  'Latest release', 'name', 'repo', 'Release date',
                  'categories'))))
    for field in fields:
        content = [entry[field] for entry in entries if field in entry]
        # flatten
        flat_content = []
        for c in content:
            if isinstance(c, list):
                flat_content.extend(c)
            else:
                flat_content.append(c)
        statistics = utils.unique_elements_and_occurrences(flat_content)
        print('{}: {}'.format(field, ', '.join(statistics)))

    # content replacements
    entries = remove_parenthized_content(
        entries, ('assets license', 'code language', 'code license', 'engine',
                  'genre', 'last active', 'library'))
    entries = remove_prefix_suffix(entries, ('code license', 'assets license'),
                                   (
                                       '"',
                                       'GNU',
                                   ), ('"', '[3]', '[2]', '[1]', 'only'))
    entries = replace_content(entries, ('code license', 'assets license'),
                              'GPL', ('General Public License', ))
    entries = replace_content(
        entries, ('code license', 'assets license'), 'GPL-2.0',
        ('GPLv2', ))  # for LGW GPLv2 would be the correct writing
    entries = replace_content(
        entries, ('code license', 'assets license'), 'GPL-2',
        ('GPLv2', 'GPL v2', 'GPL version 2.0', 'GPL 2.0',
         'General Public License v2', 'GPL version 2', 'Gplv2', 'GPL 2'))
    entries = replace_content(
        entries, ('code license', 'assets license'), 'GPL-2',
        ('GPL v2 or later', 'GPL 2+', 'GPL v2+', 'GPL version 2 or later'))
    entries = replace_content(
        entries, ('code license', 'assets license'), 'GPL-3.0',
        ('GPLv3', ))  # for LGW GPLv3 would be the correct writing
    entries = replace_content(entries, ('code license', 'assets license'),
                              'GPL-3', ('GPL v3', 'GNU GPL v3', 'GPL 3'))
    entries = replace_content(
        entries, ('code license', 'assets license'), 'GPL-3',
        ('GPL v3+', 'GPL v.3 or later', 'GPL v3 or later'))
    entries = replace_content(entries, ('code license', 'assets license'),
                              'Public domain',
                              ('public domain', 'Public Domain'))
    entries = replace_content(entries, ('code license', 'assets license'),
                              'zlib', ('zlib/libpng license', 'Zlib License'))
    entries = replace_content(entries, ('code license', 'assets license'),
                              'BSD', ('Original BSD License', ))
    entries = replace_content(
        entries, ('code license', 'assets license'), 'CC-BY-SA-3.0',
        ('Creative Commons Attribution-ShareAlike 3.0 Unported License',
         'CC-BY-SA 3.0', 'CC BY-SA 3.0'))
    entries = replace_content(entries, ('code license', 'assets license'),
                              'CC-BY-SA', ('CC BY-SA', ))
    entries = replace_content(entries, ('code license', 'assets license'),
                              'MIT', ('MIT License', 'MIT"'))
    entries = replace_content(entries, 'platform', 'macOS', ('Mac', ))
    entries = remove_prefix_suffix(entries, ('code language', 'developer'), (),
                                   ('[3]', '[2]', '[1]'))
    entries = ignore_content(entries, 'code language',
                             ('HTML5', 'HTML', 'English', 'XML', 'WML'))
    entries = replace_content(entries, 'code language', 'Lua', ('lua', 'LUA'))
    entries = remove_prefix_suffix(entries, 'genre', (), ('game', 'games'))
    entries = lower_case_content(entries, 'genre')
    entries = replace_content(entries, 'genre', 'platform', ('platformer', ))
    entries = replace_content(entries, 'genre', 'role playing', ('rpg', ))
    entries = replace_content(entries, 'genre', 'first person, shooter',
                              ('fps', ))
    entries = replace_content(entries, 'genre', 'real time, strategy',
                              ('rts', ))
    entries = replace_content(entries, 'genre', 'turn based, strategy',
                              ('tbs', ))
    entries = ignore_content(
        entries, 'categories',
        ('GPL', 'C++', 'C', 'ECMAScript', 'Python', 'Java', 'CC BY-SA', 'Lua',
         'LGPL', 'CC-BY', 'BSD', 'MIT', 'Qt', 'SDL', 'OpenGL', 'Pygame', 'PD',
         'GLUT', 'Haskell', 'Allegro', 'Ruby', 'Zlib/libpng', 'OpenAL', 'Perl',
         'Free Pascal', 'LÖVE', 'HTML5', 'Id Tech 1'))
    entries = replace_content(entries, 'library', 'pygame', ('Pygame', ))
    entries = replace_content(entries, 'library', 'Qt', ('QT', ))
    entries = ignore_content(entries, 'library',
                             ('C++', 'Lua', 'Mozilla Firefox'))
    entries = ignore_nonnumbers(entries, 'last active')
    entries = ignore_content(entries, 'last active', ('2019', ))
    entries = ignore_content(entries, 'platform', ('DOS', ))

    # list for every unique field
    print('\nfield contents after')
    fields = sorted(
        list(unique_fields -
             set(('description', 'external links', 'dev home', 'forum', 'home',
                  'linux-packages', 'developer', 'chat', 'tracker',
                  'Latest release', 'name', 'repo', 'Release date',
                  'categories'))))
    for field in fields:
        content = [entry[field] for entry in entries if field in entry]
        # flatten
        flat_content = []
        for c in content:
            if isinstance(c, list):
                flat_content.extend(c)
            else:
                flat_content.append(c)
        statistics = utils.unique_elements_and_occurrences(flat_content)
        print('{}: {}'.format(field, ', '.join(statistics)))

    # save entries
    text = json.dumps(entries, indent=1)
    utils.write_text(cleaned_entries_file, text)
        osgc_fields.update(osgc_entry.keys())
    osgc_fields = sorted(list(osgc_fields))
    print('Unique osgc-fields\n {}'.format(', '.join(osgc_fields)))

    for field in osgc_fields:
        if field in ('video', 'feed', 'url', 'repo', 'info', 'updated', 'images', 'name', 'originals'):
            continue
        osgc_content = [entry[field] for entry in osgc_entries if field in entry]
        # flatten
        flat_content = []
        for content in osgc_content:
            if isinstance(content, list):
                flat_content.extend(content)
            else:
                flat_content.append(content)
        statistics = u.unique_elements_and_occurrences(flat_content)
        statistics.sort(key=str.casefold)
        print('{}: {}'.format(field, ', '.join(statistics)))

    # eliminate the ignored or rejected entries from them
    # TODO for rejected entries we should actually have a test that also checks for the URLs because names could be not unique
    _ = [x['name'] for x in osgc_entries if x['name'] in osgc_ignored_entries + our_rejected_entries]  # those that will be ignored
    _ = set(osgc_ignored_entries) - set(_)  # those that shall be ignored minus those that will be ignored
    if _:
        print('Can un-ignore {} because not contained anymore in osgc with this name.'.format(_))
    osgc_entries = [x for x in osgc_entries if x['name'] not in osgc_ignored_entries + our_rejected_entries]

    # fix names and licenses (so they are not longer detected as deviations downstreams)
    _ = [x['name'] for x in osgc_entries if x['name'] in osgc_name_aliases.keys()]  # those that will be renamed
    _ = set(osgc_name_aliases.keys()) - set(_)  # those that shall be renamed minus those that will be renamed
    if _: