示例#1
0
            if mangled_rel_artist in page:
                found_artists.append(rel_artist)
        if (found_artists):
            reasons.append(join_names('related artist', found_artists))
            out(' * has related artists: %s, found related artists: %s' %
                (len(artists), len(found_artists)))

        # Determine if artist matches
        if not found_albums and not found_works and not found_artists and not found_urls:
            continue

        # Check if wikipedia lang is compatible with artist country
        if wp_lang != 'en' or wp_lang in acceptable_countries_for_lang:
            if wp_lang not in acceptable_countries_for_lang:
                continue
            country, country_reasons = determine_country(wikipage)
            if (country not in acceptable_countries_for_lang[wp_lang]):
                colored_out(
                    bcolors.HEADER,
                    ' * artist country (%s) not compatible with wiki language (%s)'
                    % (country, wp_lang))
                continue

        wp_url = 'http://%s.wikipedia.org/wiki/%s' % (
            wp_lang,
            quote_page_title(page_title),
        )
        wd_url = 'http://www.wikidata.org/wiki/%s' % wikipage.wikidata_id.upper(
        )
        text = 'Wikidata identifier found from matching Wikipedia page %s. The page mentions %s.' % (
            wp_url, ', '.join(reasons))
def main():
    seen = set()
    for artist in db.execute(query):
        if artist['id'] in seen:
            continue
        seen.add(artist['id'])
        colored_out(
            bcolors.OKBLUE,
            'Looking up artist "%s" http://musicbrainz.org/artist/%s' %
            (artist['name'], artist['gid']))
        out(' * wiki:', artist['url'])

        artist = dict(artist)
        update = set()
        reasons = []

        page = WikiPage.fetch(artist['url'], False)

        if not artist['area']:
            country, country_reasons = determine_country(page)
            if country:
                country_id = country_ids[country]
                artist['area'] = country_id
                update.add('area')
                reasons.append(('COUNTRY', country_reasons))

        if not artist['type']:
            type, type_reasons = determine_type(page)
            if type:
                type_id = artist_type_ids[type]
                artist['type'] = type_id
                update.add('type')
                reasons.append(('TYPE', type_reasons))

        if not artist['gender'] and artist['type'] == 1:
            gender, gender_reasons = determine_gender(page)
            if gender:
                gender_id = gender_ids[gender]
                artist['gender'] = gender_id
                update.add('gender')
                reasons.append(('GENDER', gender_reasons))

        is_performance_name = False
        if artist['type'] == 1 and CHECK_PERFORMANCE_NAME:
            is_performance_name = db.execute(performance_name_query,
                                             artist['id']).scalar() > 0
            out(" * checking for performance name", is_performance_name)

        if not artist['begin_date_year']:
            begin_date, begin_date_reasons = determine_begin_date(
                artist, page, is_performance_name)
            if begin_date['year']:
                colored_out(bcolors.OKGREEN, " * new begin date:", begin_date)
                artist['begin_date_year'] = begin_date['year']
                artist['begin_date_month'] = begin_date['month']
                artist['begin_date_day'] = begin_date['day']
                update.add('begin_date')
                reasons.append(('BEGIN DATE', begin_date_reasons))
        if not artist['end_date_year']:
            end_date, end_date_reasons = determine_end_date(
                artist, page, is_performance_name)
            if end_date['year']:
                colored_out(bcolors.OKGREEN, " * new end date:", end_date)
                artist['end_date_year'] = end_date['year']
                artist['end_date_month'] = end_date['month']
                artist['end_date_day'] = end_date['day']
                update.add('end_date')
                reasons.append(('END DATE', end_date_reasons))

        if update:
            edit_note = 'From %s' % (artist['url'], )
            for field, reason in reasons:
                edit_note += '\n\n%s:\n%s' % (field, ' '.join(reason))
            out(' * edit note:', edit_note.replace('\n', ' '))
            time.sleep(10)
            mb.edit_artist(artist, update, edit_note)

        if artist['processed'] is None:
            db.execute(
                "INSERT INTO bot_wp_artist_data (gid, lang) VALUES (%s, %s)",
                (artist['gid'], wp_lang))
        else:
            db.execute(
                "UPDATE bot_wp_artist_data SET processed = now() WHERE (gid, lang) = (%s, %s)",
                (artist['gid'], wp_lang))
            mangled_rel_artist = mangle_name(rel_artist)
            if mangled_rel_artist in page:
                found_artists.append(rel_artist)
        if (found_artists):
            reasons.append(join_names('related artist', found_artists))
            out(' * has related artists: %s, found related artists: %s' % (len(artists), len(found_artists)))

        # Determine if artist matches
        if not found_albums and not found_works and not found_artists and not found_urls:
            continue

        # Check if wikipedia lang is compatible with artist country
        if wp_lang != 'en' or wp_lang in acceptable_countries_for_lang:
            if wp_lang not in acceptable_countries_for_lang:
                continue
            country, country_reasons = determine_country(wikipage)
            if (country not in acceptable_countries_for_lang[wp_lang]):
                colored_out(bcolors.HEADER, ' * artist country (%s) not compatible with wiki language (%s)' % (country, wp_lang))
                continue

        wp_url = 'https://%s.wikipedia.org/wiki/%s' % (wp_lang, quote_page_title(page_title),)
        wd_url = 'https://www.wikidata.org/wiki/%s' % wikipage.wikidata_id.upper()
        text = 'Wikidata identifier found from matching Wikipedia page %s. The page mentions %s.' % (wp_url, ', '.join(reasons))
        colored_out(bcolors.OKGREEN, ' * linking to %s' % (wd_url,))
        out(' * edit note: %s' % (text,))
        time.sleep(60)
        mb.add_url("artist", artist['gid'], 352, wd_url, text)
        break

    if artist['processed'] is None:
        db.execute("INSERT INTO bot_wp_artist_link (gid, lang) VALUES (%s, %s)", (artist['gid'], wp_lang))
def main():
    seen = set()
    for artist in db.execute(query):
        if artist['id'] in seen:
            continue
        seen.add(artist['id'])
        colored_out(bcolors.OKBLUE, 'Looking up artist "%s" http://musicbrainz.org/artist/%s' % (artist['name'], artist['gid']))
        out(' * wiki:', artist['url'])

        artist = dict(artist)
        update = set()
        reasons = []

        page = WikiPage.fetch(artist['url'])

        if not artist['country']:
            country, country_reasons = determine_country(page)
            if country:
                country_id = country_ids[country]
                artist['country'] = country_id
                update.add('country')
                reasons.append(('COUNTRY', country_reasons))

        if not artist['type']:
            type, type_reasons = determine_type(page)
            if type:
                type_id = artist_type_ids[type]
                artist['type'] = type_id
                update.add('type')
                reasons.append(('TYPE', type_reasons))

        if not artist['gender'] and artist['type'] == 1:
            gender, gender_reasons = determine_gender(page)
            if gender:
                gender_id = gender_ids[gender]
                artist['gender'] = gender_id
                update.add('gender')
                reasons.append(('GENDER', gender_reasons))

        is_performance_name = False
        if artist['type'] == 1 and CHECK_PERFORMANCE_NAME:
            is_performance_name = db.execute(performance_name_query, artist['id']).scalar() > 0
            out(" * checking for performance name", is_performance_name)

        if not artist['begin_date_year']:
            begin_date, begin_date_reasons = determine_begin_date(artist, page, is_performance_name)
            if begin_date['year']:
                colored_out(bcolors.OKGREEN, " * new begin date:", begin_date)
                artist['begin_date_year'] = begin_date['year']
                artist['begin_date_month'] = begin_date['month']
                artist['begin_date_day'] = begin_date['day']
                update.add('begin_date')
                reasons.append(('BEGIN DATE', begin_date_reasons))
        if not artist['end_date_year']:
            end_date, end_date_reasons = determine_end_date(artist, page, is_performance_name)
            if end_date['year']:
                colored_out(bcolors.OKGREEN, " * new end date:", end_date)
                artist['end_date_year'] = end_date['year']
                artist['end_date_month'] = end_date['month']
                artist['end_date_day'] = end_date['day']
                update.add('end_date')
                reasons.append(('END DATE', end_date_reasons))

        if update:
            edit_note = 'From %s' % (artist['url'],)
            for field, reason in reasons:
                edit_note += '\n\n%s:\n%s' % (field, ' '.join(reason))
            out(' * edit note:', edit_note.replace('\n', ' '))
            time.sleep(10)
            mb.edit_artist(artist, update, edit_note)

        db.execute("INSERT INTO bot_wp_artist_data (gid, lang) VALUES (%s, %s)", (artist['gid'], wp_lang))
        out()