if mangled_rel_artist in page: found_artists.append(rel_artist) if (found_artists): reasons.append(join_names('related artist', found_artists)) out(' * has related artists: %s, found related artists: %s' % (len(artists), len(found_artists))) # Determine if artist matches if not found_albums and not found_works and not found_artists and not found_urls: continue # Check if wikipedia lang is compatible with artist country if wp_lang != 'en' or wp_lang in acceptable_countries_for_lang: if wp_lang not in acceptable_countries_for_lang: continue country, country_reasons = determine_country(wikipage) if (country not in acceptable_countries_for_lang[wp_lang]): colored_out( bcolors.HEADER, ' * artist country (%s) not compatible with wiki language (%s)' % (country, wp_lang)) continue wp_url = 'http://%s.wikipedia.org/wiki/%s' % ( wp_lang, quote_page_title(page_title), ) wd_url = 'http://www.wikidata.org/wiki/%s' % wikipage.wikidata_id.upper( ) text = 'Wikidata identifier found from matching Wikipedia page %s. The page mentions %s.' % ( wp_url, ', '.join(reasons))
def main(): seen = set() for artist in db.execute(query): if artist['id'] in seen: continue seen.add(artist['id']) colored_out( bcolors.OKBLUE, 'Looking up artist "%s" http://musicbrainz.org/artist/%s' % (artist['name'], artist['gid'])) out(' * wiki:', artist['url']) artist = dict(artist) update = set() reasons = [] page = WikiPage.fetch(artist['url'], False) if not artist['area']: country, country_reasons = determine_country(page) if country: country_id = country_ids[country] artist['area'] = country_id update.add('area') reasons.append(('COUNTRY', country_reasons)) if not artist['type']: type, type_reasons = determine_type(page) if type: type_id = artist_type_ids[type] artist['type'] = type_id update.add('type') reasons.append(('TYPE', type_reasons)) if not artist['gender'] and artist['type'] == 1: gender, gender_reasons = determine_gender(page) if gender: gender_id = gender_ids[gender] artist['gender'] = gender_id update.add('gender') reasons.append(('GENDER', gender_reasons)) is_performance_name = False if artist['type'] == 1 and CHECK_PERFORMANCE_NAME: is_performance_name = db.execute(performance_name_query, artist['id']).scalar() > 0 out(" * checking for performance name", is_performance_name) if not artist['begin_date_year']: begin_date, begin_date_reasons = determine_begin_date( artist, page, is_performance_name) if begin_date['year']: colored_out(bcolors.OKGREEN, " * new begin date:", begin_date) artist['begin_date_year'] = begin_date['year'] artist['begin_date_month'] = begin_date['month'] artist['begin_date_day'] = begin_date['day'] update.add('begin_date') reasons.append(('BEGIN DATE', begin_date_reasons)) if not artist['end_date_year']: end_date, end_date_reasons = determine_end_date( artist, page, is_performance_name) if end_date['year']: colored_out(bcolors.OKGREEN, " * new end date:", end_date) artist['end_date_year'] = end_date['year'] artist['end_date_month'] = end_date['month'] artist['end_date_day'] = end_date['day'] update.add('end_date') reasons.append(('END DATE', end_date_reasons)) if update: edit_note = 'From %s' % (artist['url'], ) for field, reason in reasons: edit_note += '\n\n%s:\n%s' % (field, ' '.join(reason)) out(' * edit note:', edit_note.replace('\n', ' ')) time.sleep(10) mb.edit_artist(artist, update, edit_note) if artist['processed'] is None: db.execute( "INSERT INTO bot_wp_artist_data (gid, lang) VALUES (%s, %s)", (artist['gid'], wp_lang)) else: db.execute( "UPDATE bot_wp_artist_data SET processed = now() WHERE (gid, lang) = (%s, %s)", (artist['gid'], wp_lang))
mangled_rel_artist = mangle_name(rel_artist) if mangled_rel_artist in page: found_artists.append(rel_artist) if (found_artists): reasons.append(join_names('related artist', found_artists)) out(' * has related artists: %s, found related artists: %s' % (len(artists), len(found_artists))) # Determine if artist matches if not found_albums and not found_works and not found_artists and not found_urls: continue # Check if wikipedia lang is compatible with artist country if wp_lang != 'en' or wp_lang in acceptable_countries_for_lang: if wp_lang not in acceptable_countries_for_lang: continue country, country_reasons = determine_country(wikipage) if (country not in acceptable_countries_for_lang[wp_lang]): colored_out(bcolors.HEADER, ' * artist country (%s) not compatible with wiki language (%s)' % (country, wp_lang)) continue wp_url = 'https://%s.wikipedia.org/wiki/%s' % (wp_lang, quote_page_title(page_title),) wd_url = 'https://www.wikidata.org/wiki/%s' % wikipage.wikidata_id.upper() text = 'Wikidata identifier found from matching Wikipedia page %s. The page mentions %s.' % (wp_url, ', '.join(reasons)) colored_out(bcolors.OKGREEN, ' * linking to %s' % (wd_url,)) out(' * edit note: %s' % (text,)) time.sleep(60) mb.add_url("artist", artist['gid'], 352, wd_url, text) break if artist['processed'] is None: db.execute("INSERT INTO bot_wp_artist_link (gid, lang) VALUES (%s, %s)", (artist['gid'], wp_lang))
def main(): seen = set() for artist in db.execute(query): if artist['id'] in seen: continue seen.add(artist['id']) colored_out(bcolors.OKBLUE, 'Looking up artist "%s" http://musicbrainz.org/artist/%s' % (artist['name'], artist['gid'])) out(' * wiki:', artist['url']) artist = dict(artist) update = set() reasons = [] page = WikiPage.fetch(artist['url']) if not artist['country']: country, country_reasons = determine_country(page) if country: country_id = country_ids[country] artist['country'] = country_id update.add('country') reasons.append(('COUNTRY', country_reasons)) if not artist['type']: type, type_reasons = determine_type(page) if type: type_id = artist_type_ids[type] artist['type'] = type_id update.add('type') reasons.append(('TYPE', type_reasons)) if not artist['gender'] and artist['type'] == 1: gender, gender_reasons = determine_gender(page) if gender: gender_id = gender_ids[gender] artist['gender'] = gender_id update.add('gender') reasons.append(('GENDER', gender_reasons)) is_performance_name = False if artist['type'] == 1 and CHECK_PERFORMANCE_NAME: is_performance_name = db.execute(performance_name_query, artist['id']).scalar() > 0 out(" * checking for performance name", is_performance_name) if not artist['begin_date_year']: begin_date, begin_date_reasons = determine_begin_date(artist, page, is_performance_name) if begin_date['year']: colored_out(bcolors.OKGREEN, " * new begin date:", begin_date) artist['begin_date_year'] = begin_date['year'] artist['begin_date_month'] = begin_date['month'] artist['begin_date_day'] = begin_date['day'] update.add('begin_date') reasons.append(('BEGIN DATE', begin_date_reasons)) if not artist['end_date_year']: end_date, end_date_reasons = determine_end_date(artist, page, is_performance_name) if end_date['year']: colored_out(bcolors.OKGREEN, " * new end date:", end_date) artist['end_date_year'] = end_date['year'] artist['end_date_month'] = end_date['month'] artist['end_date_day'] = end_date['day'] update.add('end_date') reasons.append(('END DATE', end_date_reasons)) if update: edit_note = 'From %s' % (artist['url'],) for field, reason in reasons: edit_note += '\n\n%s:\n%s' % (field, ' '.join(reason)) out(' * edit note:', edit_note.replace('\n', ' ')) time.sleep(10) mb.edit_artist(artist, update, edit_note) db.execute("INSERT INTO bot_wp_artist_data (gid, lang) VALUES (%s, %s)", (artist['gid'], wp_lang)) out()