示例#1
0
def push_data():
    mb = MusicBrainzClient(cfg.MB_USERNAME, cfg.MB_PASSWORD, cfg.MB_SITE)
    for row in aClient.get_mb_data():
        gid, workid, iswc, artist, work = row
        note = 'Data taken from ' + aClient.search_url(str(workid), 'i')
        mb.edit_work(gid, {'iswc': iswc}, note)
        print artist + ' work: ' + work + ' Done!'
示例#2
0
def push_data():
    mb = MusicBrainzClient(cfg.MB_USERNAME, cfg.MB_PASSWORD, cfg.MB_SITE)
    for row in aClient.get_mb_data():
        gid, workid, iswc, artist, work = row
        note = 'Data taken from ' + aClient.search_url(str(workid), 'i')
        mb.edit_work(gid, {'iswc': iswc}, note)
        print artist + ' work: ' + work + ' Done!'
示例#3
0
def main(args):
    if not args:
        out('Usage:   cancel_edits.py <edit_number edit_note>...\n')
        out('Example: cancel_edits.py "Edit #123 my mistake"')
        out('         cancel_edits.py 123 124 125')
        return

    edits = []
    for arg in args:
        if not isinstance(arg, unicode):
            arg = unicode(arg, locale.getpreferredencoding())
        m = re.match(ur'(?:[Ee]dit )?#?([0-9]+) ?(.*)$', arg)
        if not m:
            out('invalid edit number "%s", aborting!' % arg)
            return
        edit_nr = str(m.group(1))
        edit_note = m.group(2).lstrip()
        edits.append((edit_nr, edit_note))

    mb = MusicBrainzClient(cfg.MB_USERNAME, cfg.MB_PASSWORD, cfg.MB_SITE)
    for edit_nr, edit_note in edits:
        out(u'Cancel edit #%s: %s' % (edit_nr, edit_note if edit_note else u'<no edit note>'))
        mb.cancel_edit(str(edit_nr), edit_note)
示例#4
0
def main(args):
    if not args:
        out('Usage:   cancel_edits.py <edit_number edit_note>...\n')
        out('Example: cancel_edits.py "Edit #123 my mistake"')
        out('         cancel_edits.py 123 124 125')
        return

    edits = []
    for arg in args:
        if not isinstance(arg, unicode):
            arg = unicode(arg, locale.getpreferredencoding())
        m = re.match(ur'(?:[Ee]dit )?#?([0-9]+) ?(.*)$', arg)
        if not m:
            out('invalid edit number "%s", aborting!' % arg)
            return
        edit_nr = str(m.group(1))
        edit_note = m.group(2).lstrip()
        edits.append((edit_nr, edit_note))

    mb = MusicBrainzClient(cfg.MB_USERNAME, cfg.MB_PASSWORD, cfg.MB_SITE)
    for edit_nr, edit_note in edits:
        out(u'Cancel edit #%s: %s' % (edit_nr, edit_note if edit_note else u'<no edit note>'))
        mb.cancel_edit(str(edit_nr), edit_note)
#!/usr/bin/python

import re
import sqlalchemy
from editing import MusicBrainzClient
import time
from utils import out, colored_out, bcolors
import config as cfg

engine = sqlalchemy.create_engine(cfg.MB_DB)
db = engine.connect()
db.execute("SET search_path TO musicbrainz, %s" % cfg.BOT_SCHEMA_DB)

mb = MusicBrainzClient(cfg.MB_USERNAME, cfg.MB_PASSWORD, cfg.MB_SITE)

query = """
    SELECT DISTINCT r.id, r.gid AS r_gid, w.gid AS w_gid, r.name, r.comment, lrw.id AS rel_id, lt.id AS link_type, r.artist_credit
    FROM recording r
        JOIN l_recording_work lrw ON lrw.entity0 = r.id
        JOIN link l ON l.id = lrw.link
        JOIN link_type lt ON l.link_type = lt.id
        JOIN link_attribute la ON la.link = l.id
        JOIN link_attribute_type lat ON la.attribute_type = lat.id AND lat.name = 'live'
        JOIN work w ON lrw.entity1 = w.id
    WHERE r.comment ~ E'live, \\\\d{4}(-\\\\d{2})?(-\\\\d{2})?:'
        AND l.begin_date_year IS NULL
        AND l.end_date_year IS NULL
        AND lt.name = 'performance'
        AND r.edits_pending = 0 AND lrw.edits_pending = 0
        /* Only one linked work */
        AND NOT EXISTS (SELECT 1 FROM l_recording_work lrw2 WHERE lrw2.entity0 = r.id AND lrw2.entity1 <> lrw.entity1)
import re
import sqlalchemy
import solr
from editing import MusicBrainzClient
from mbbot.source.secondhandsongs import SHSWebService
import pprint
import urllib
import time
from utils import mangle_name, join_names, out, colored_out, bcolors
import config as cfg

engine = sqlalchemy.create_engine(cfg.MB_DB)
db = engine.connect()
db.execute("SET search_path TO musicbrainz, %s" % cfg.BOT_SCHEMA_DB)

mb = MusicBrainzClient(cfg.MB_USERNAME, cfg.MB_PASSWORD, cfg.MB_SITE)
shs = SHSWebService()

"""
CREATE TABLE bot_shs_work_lang (
    work uuid NOT NULL,
    processed timestamp with time zone DEFAULT now(),
    CONSTRAINT bot_shs_work_lang_pkey PRIMARY KEY (work)
);
"""

query = """
WITH
    works_wo_lang AS (
        SELECT w.id AS work_id, u.url AS shs_url
        FROM work w
示例#7
0
from mbbot.wp.analysis import determine_country
from utils import mangle_name, join_names, out, colored_out, bcolors, escape_query, quote_page_title, wp_is_canonical_page
import config as cfg

engine = sqlalchemy.create_engine(cfg.MB_DB)
db = engine.connect()
db.execute("SET search_path TO musicbrainz, %s" % cfg.BOT_SCHEMA_DB)

wp_lang = sys.argv[1] if len(sys.argv) > 1 else 'en'

wp = MediaWiki('http://%s.wikipedia.org/w/api.php' % wp_lang)

suffix = '_' + wp_lang if wp_lang != 'en' else ''
wps = solr.SolrConnection('http://localhost:8983/solr/wikipedia' + suffix)

mb = MusicBrainzClient(cfg.MB_USERNAME, cfg.MB_PASSWORD, cfg.MB_SITE)
"""
CREATE TABLE bot_wp_artist_link (
    gid uuid NOT NULL,
    lang character varying(2),
    processed timestamp with time zone DEFAULT now()
    CONSTRAINT bot_wp_artist_link_pkey PRIMARY KEY (gid, lang)
);

CREATE TABLE bot_wp_artist_link_ignore (
    gid uuid NOT NULL,
    lang character varying(2),
    CONSTRAINT bot_wp_artist_link_ignore_pkey PRIMARY KEY (gid, lang)
);
"""
from mbbot.wp.analysis import determine_country
from utils import mangle_name, join_names, out, colored_out, bcolors, escape_query, quote_page_title, wp_is_canonical_page
import config as cfg

engine = sqlalchemy.create_engine(cfg.MB_DB)
db = engine.connect()
db.execute("SET search_path TO musicbrainz, %s" % cfg.BOT_SCHEMA_DB)

wp_lang = sys.argv[1] if len(sys.argv) > 1 else 'en'

wp = MediaWiki('https://%s.wikipedia.org/w/api.php' % wp_lang)

suffix = '_' + wp_lang if wp_lang != 'en' else ''
wps = solr.SolrConnection('http://localhost:8983/solr/wikipedia' + suffix)

mb = MusicBrainzClient(cfg.MB_USERNAME, cfg.MB_PASSWORD, cfg.MB_SITE)

"""
CREATE TABLE bot_wp_artist_link (
    gid uuid NOT NULL,
    lang character varying(2),
    processed timestamp with time zone DEFAULT now()
    CONSTRAINT bot_wp_artist_link_pkey PRIMARY KEY (gid, lang)
);

CREATE TABLE bot_wp_artist_link_ignore (
    gid uuid NOT NULL,
    lang character varying(2),
    CONSTRAINT bot_wp_artist_link_ignore_pkey PRIMARY KEY (gid, lang)
);
"""
示例#9
0
import re
import sqlalchemy
import solr
from editing import MusicBrainzClient
import pprint
import urllib
import time
from utils import mangle_name, join_names, out, colored_out, bcolors
import config as cfg

engine = sqlalchemy.create_engine(cfg.MB_DB)
db = engine.connect()
db.execute("SET search_path TO musicbrainz")

mb = MusicBrainzClient(cfg.MB_USERNAME, cfg.MB_PASSWORD, cfg.MB_SITE)
"""
CREATE TABLE bot_encyclopedisque_medium_format (
    gid uuid NOT NULL,
    processed timestamp with time zone DEFAULT now()
);

ALTER TABLE ONLY bot_encyclopedisque_medium_format
    ADD CONSTRAINT bot_encyclopedisque_medium_format_pkey PRIMARY KEY (gid);

"""

query = """
WITH
    releases_wo_7inch AS (
        SELECT r.id, u.url, m.format
示例#10
0
from mbbot.wp.analysis import determine_country
from utils import mangle_name, join_names, out, colored_out, bcolors, escape_query, quote_page_title, wp_is_canonical_page
import config as cfg

engine = sqlalchemy.create_engine(cfg.MB_DB)
db = engine.connect()
db.execute("SET search_path TO musicbrainz, %s" % cfg.BOT_SCHEMA_DB)

wp_lang = sys.argv[1] if len(sys.argv) > 1 else 'en'

wp = MediaWiki('http://%s.wikipedia.org/w/api.php' % wp_lang)

suffix = '_' + wp_lang if wp_lang != 'en' else ''
wps = solr.SolrConnection('http://localhost:8983/solr/wikipedia' + suffix)

mb = MusicBrainzClient(cfg.MB_USERNAME, cfg.MB_PASSWORD, cfg.MB_SITE)

"""
CREATE TABLE bot_wp_artist_link (
    gid uuid NOT NULL,
    lang character varying(2),
    processed timestamp with time zone DEFAULT now()
    CONSTRAINT bot_wp_artist_link_pkey PRIMARY KEY (gid, lang)
);

CREATE TABLE bot_wp_artist_link_ignore (
    gid uuid NOT NULL,
    lang character varying(2),
    CONSTRAINT bot_wp_artist_link_ignore_pkey PRIMARY KEY (gid, lang)
);
"""
示例#11
0
from mbbot.wp.wikipage import WikiPage
from utils import mangle_name, join_names, out, colored_out, bcolors, escape_query, quote_page_title, wp_is_canonical_page
import config as cfg

engine = sqlalchemy.create_engine(cfg.MB_DB)
db = engine.connect()
db.execute("SET search_path TO musicbrainz, %s" % cfg.BOT_SCHEMA_DB)

wp_lang = sys.argv[1] if len(sys.argv) > 1 else 'en'

wp = MediaWiki('https://%s.wikipedia.org/w/api.php' % wp_lang)

suffix = '_' + wp_lang if wp_lang != 'en' else ''
wps = solr.SolrConnection('http://localhost:8983/solr/wikipedia' + suffix)

mb = MusicBrainzClient(cfg.MB_USERNAME, cfg.MB_PASSWORD, cfg.MB_SITE)

"""
CREATE TABLE bot_wp_rg_link (
    gid uuid NOT NULL,
    lang character varying(2),
    processed timestamp with time zone DEFAULT now(),
    CONSTRAINT bot_wp_rg_link_pkey PRIMARY KEY (gid, lang)
);
"""


acceptable_countries_for_lang = {
    'fr': ['FR', 'MC']
}
acceptable_countries_for_lang['en'] = acceptable_countries_for_lang['fr']
示例#12
0
 def open(self, mb=False, do=False, client=False):
     if mb: self.mbdb = self.mbengine.connect()
     if do: self.dodb = self.doengine.connect()
     if client: return MusicBrainzClient(cfg.MB_USERNAME, cfg.MB_PASSWORD, cfg.MB_SITE)
     return None
from simplemediawiki import MediaWiki
from editing import MusicBrainzClient
import pprint
import urllib
import time
from utils import mangle_name, join_names, mw_remove_markup, out
import config as cfg

engine = sqlalchemy.create_engine(cfg.MB_DB)
db = engine.connect()
db.execute("SET search_path TO musicbrainz")

wp = MediaWiki('http://en.wikipedia.org/w/api.php')
wps = solr.SolrConnection('http://localhost:8983/solr/wikipedia')

mb = MusicBrainzClient(cfg.MB_USERNAME, cfg.MB_PASSWORD, cfg.MB_SITE)

query = """
SELECT DISTINCT
    a.id, a.gid, a.name, a.country, a.type, a.gender,
    a.begin_date_year,
    a.begin_date_month,
    a.begin_date_day,
    a.end_date_year,
    a.end_date_month,
    a.end_date_day,
    u.url
FROM s_artist a
JOIN l_artist_url l ON l.entity0 = a.id AND l.link IN (SELECT id FROM link WHERE link_type = 179)
JOIN url u ON u.id = l.entity1
LEFT JOIN bot_wp_artist_data b ON a.gid = b.gid
示例#14
0
from simplemediawiki import MediaWiki
from editing import MusicBrainzClient
import pprint
import urllib
import time
from utils import mangle_name, join_names, out
import config as cfg

engine = sqlalchemy.create_engine(cfg.MB_DB)
db = engine.connect()
db.execute("SET search_path TO musicbrainz")

wp = MediaWiki('http://en.wikipedia.org/w/api.php')
wps = solr.SolrConnection('http://localhost:8983/solr/wikipedia')

mb = MusicBrainzClient(cfg.MB_USERNAME, cfg.MB_PASSWORD, cfg.MB_SITE)

"""
CREATE TABLE bot_wp_rg (
    gid uuid NOT NULL,
    processed timestamp with time zone DEFAULT now()
);

ALTER TABLE ONLY bot_wp_rg
    ADD CONSTRAINT bot_wp_rg_pkey PRIMARY KEY (gid);
"""

query = """
WITH
    rgs_wo_wikipedia AS (
        SELECT a.id
示例#15
0
import re
import sys
import urllib
import urllib2
import config
import pymongo
import pprint
from editing import MusicBrainzClient
import cgi


mb = MusicBrainzClient('lukz_bot', 'mb', 'http://mb.muziq.eu')


opener = urllib2.build_opener()
if config.WWW_USER_AGENT:
    opener.addheaders = [('User-Agent', config.WWW_USER_AGENT)]


mongo = pymongo.Connection()
db = mongo.mbot


html_escape_table = {
    "&": "&amp;",
    '"': "&quot;",
    "'": "&apos;",
    ">": "&gt;",
    "<": "&lt;",
    }
示例#16
0
        m = re.search(ur'<span property="v:identifier">(.+?)</span>', page)
        identifier = m.group(1).upper() if m else None
        return identifier if identifier and isrc_valid(identifier) else None

query_releases = '''
SELECT DISTINCT r.id, r.gid, r.barcode
FROM release r
WHERE r.barcode ~ %s
'''

engine = sqlalchemy.create_engine(cfg.MB_DB)
db = engine.connect()
db.execute('SET search_path TO musicbrainz')

zeroinch = ZeroInch()
mb = MusicBrainzClient(cfg.MB_USERNAME, cfg.MB_PASSWORD, cfg.MB_SITE)
ws = MusicBrainzWebservice(cfg.MB_USERNAME, cfg.MB_PASSWORD, cfg.MB_SITE)


def identify_isrc_edit(isrcs):
    return lambda edit_nr, text: set(isrcs) == set(re.findall(r'<a href="' + cfg.MB_SITE + r'/isrc/([A-Z0-9]{12})">', text))

isrc_submitted = set(url for url, in db.execute('''SELECT url FROM bot_isrc_zeroinch_submitted'''))
isrc_missing = set(url for url, in db.execute('''SELECT url FROM bot_isrc_zeroinch_missing'''))
isrc_problematic = set(url for url, in db.execute('''SELECT url FROM bot_isrc_zeroinch_problematic'''))

#for artists in [['Gui_Boratto']]:
#for artists in zeroinch.get_artists('/label/Warp_Records'):
for artists in zeroinch.get_artists('/catalogue', cipher='all', page='1'):
    for artist in artists:
        artist_url = u'http://www.zero-inch.com/artist/%s' % artist
import re
import sqlalchemy
import solr
from editing import MusicBrainzClient
import discogs_client as discogs
import pprint
import urllib
import time
from utils import mangle_name, join_names, out, colored_out, bcolors
import config as cfg

engine = sqlalchemy.create_engine(cfg.MB_DB)
db = engine.connect()
db.execute("SET search_path TO musicbrainz, %s" % cfg.BOT_SCHEMA_DB)

mb = MusicBrainzClient(cfg.MB_USERNAME, cfg.MB_PASSWORD, cfg.MB_SITE)

discogs.user_agent = "MusicBrainzBot/0.1 +https://github.com/murdos/musicbrainz-bot"

"""
CREATE TABLE bot_discogs_medium_format (
    medium integer NOT NULL,
    processed timestamp with time zone DEFAULT now(),
    CONSTRAINT bot_discogs_medium_format_pkey PRIMARY KEY (medium)
);
"""

query = """
WITH
    mediums_with_fuzzy_format AS (
        SELECT r.id AS release_id, m.position, m.id AS medium_id, u.url AS discogs_url, m.format
import re
import sqlalchemy
import solr
from editing import MusicBrainzClient
import pprint
import urllib
import time
from utils import mangle_name, join_names, out, colored_out, bcolors
import config as cfg

engine = sqlalchemy.create_engine(cfg.MB_DB)
db = engine.connect()
db.execute("SET search_path TO musicbrainz")

mb = MusicBrainzClient(cfg.MB_USERNAME, cfg.MB_PASSWORD, cfg.MB_SITE)

"""
CREATE TABLE bot_encyclopedisque_medium_format (
    gid uuid NOT NULL,
    processed timestamp with time zone DEFAULT now()
);

ALTER TABLE ONLY bot_encyclopedisque_medium_format
    ADD CONSTRAINT bot_encyclopedisque_medium_format_pkey PRIMARY KEY (gid);

"""

query = """
WITH
    releases_wo_7inch AS (
示例#19
0
def main(verbose=False):
    download_if_modified(bbc_sitemap_url, bbc_sitemap)

    db = db_connect()

    release_redirects = dict(get_release_redirects(db))
    release_groups = dict(get_release_groups(db))
    releases = dict(get_releases(db))
    bbc_reviews_set = set((gid, url) for gid, url in db.execute("""SELECT gid, url FROM bot_bbc_reviews_set"""))

    review_urls = defaultdict(set)
    for rg, url in get_review_urls(db):
        review_urls[rg].add(url)

    cleanup_review_urls = set()
    for cleanup_url in cleanup_urls:
        f = urllib.urlopen(cleanup_url)
        cleanup_review_urls |= set(re.findall(ur"http://www.bbc.co.uk/music/reviews/[0-9a-z]+", f.read()))

    editor_id = db.execute("""SELECT id FROM editor WHERE name = %s""", cfg.MB_USERNAME).first()[0]
    mb = MusicBrainzClient(cfg.MB_USERNAME, cfg.MB_PASSWORD, cfg.MB_SITE, editor_id=editor_id)

    normal_edits_left, edits_left = mb.edits_left()

    bbc_reviews = list(load_bbc_reviews(bbc_sitemap))
    count = len(bbc_reviews)
    for i, (review_url, release_url, title) in enumerate(bbc_reviews):
        if normal_edits_left <= 0:
            break
        if verbose:
            out(u"%d/%d - %.2f%%" % (i + 1, count, (i + 1) * 100.0 / count))
            out(u"%s %s" % (title, review_url))
            out(release_url)
        if review_url in cleanup_review_urls:
            continue
        release_gid = utils.extract_mbid(release_url, "release")
        row = release_redirects.get(release_gid)
        if not row:
            row = releases.get(release_gid)
        if not row:
            if verbose:
                out("  non-existant release in review %s" % review_url)
            continue
        rg, ac, release_name = row
        gid, name = release_groups[rg]
        if review_url in review_urls[rg]:
            continue
        if (gid, review_url) in bbc_reviews_set:
            if verbose:
                out(u"  already linked earlier (probably got removed by some editor!")
            continue
        mb_title = "%s - %s" % (artist_credit(db, ac), release_name)
        if not are_similar(title, mb_title):
            if verbose:
                out(u"  similarity too small: %s <-> %s" % (title, mb_title))
                # out(u'|-\n| [%s %s]\n| [[ReleaseGroup:%s|%s]]\n| [[Release:%s|%s]]' % (review_url, bbc_name, gid, name, release_gid, release_name))
            continue
        text = (
            u"Review is in BBC mapping [1], and review name “%s” is"
            " similar to the release name. If this is wrong,"
            " please note it here and put the correct mapping in"
            " the wiki [2].\n\n[1] %s\n[2] %s" % (title, bbc_sitemap_url, cleanup_urls[0])
        )
        text += "\n\n%s" % prog
        try:
            out(u"http://musicbrainz.org/release-group/%s  ->  %s" % (gid, review_url))
            mb.add_url("release_group", gid, 94, review_url, text, auto=False)
            db.execute("INSERT INTO bot_bbc_reviews_set (gid,url) VALUES (%s,%s)", (gid, review_url))
            bbc_reviews_set.add((gid, review_url))
            normal_edits_left -= 1
        except (urllib2.HTTPError, urllib2.URLError, socket.timeout) as e:
            out(e)
def init_mb():
    global mb
    mb = MusicBrainzClient(cfg.MB_USERNAME, cfg.MB_PASSWORD, cfg.MB_SITE)
示例#21
0
import re
import sqlalchemy
import solr
from editing import MusicBrainzClient
from mbbot.source.secondhandsongs import SHSWebService
import pprint
import urllib
import time
from utils import mangle_name, join_names, out, colored_out, bcolors
import config as cfg

engine = sqlalchemy.create_engine(cfg.MB_DB)
db = engine.connect()
db.execute("SET search_path TO musicbrainz, %s" % cfg.BOT_SCHEMA_DB)

mb = MusicBrainzClient(cfg.MB_USERNAME, cfg.MB_PASSWORD, cfg.MB_SITE)
shs = SHSWebService()
"""
CREATE TABLE bot_shs_work_lang (
    work uuid NOT NULL,
    processed timestamp with time zone DEFAULT now(),
    CONSTRAINT bot_shs_work_lang_pkey PRIMARY KEY (work)
);
"""

query = """
WITH
    works_wo_lang AS (
        SELECT w.id AS work_id, u.url AS shs_url
        FROM work w
            JOIN l_url_work l ON l.entity1 = w.id AND l.link IN (SELECT id FROM link WHERE link_type = 280)
示例#22
0
import re
import sys
import urllib
import urllib2
import config
import pymongo
import pprint
from editing import MusicBrainzClient
import cgi


mb = MusicBrainzClient('lukz_bot', 'mb', 'http://mb.muziq.eu')


opener = urllib2.build_opener()
if config.WWW_USER_AGENT:
    opener.addheaders = [('User-Agent', config.WWW_USER_AGENT)]


mongo = pymongo.Connection()
db = mongo.mbot


html_escape_table = {
    "&": "&amp;",
    '"': "&quot;",
    "'": "&apos;",
    ">": "&gt;",
    "<": "&lt;",
    }
import re
import sqlalchemy
import solr
from editing import MusicBrainzClient
import discogs_client as discogs
import pprint
import urllib
import time
from utils import mangle_name, join_names, out, colored_out, bcolors
import config as cfg

engine = sqlalchemy.create_engine(cfg.MB_DB)
db = engine.connect()
db.execute("SET search_path TO musicbrainz, %s" % cfg.BOT_SCHEMA_DB)

mb = MusicBrainzClient(cfg.MB_USERNAME, cfg.MB_PASSWORD, cfg.MB_SITE)

discogs.user_agent = 'MusicBrainzBot/0.1 +https://github.com/murdos/musicbrainz-bot'
"""
CREATE TABLE bot_discogs_release_packaging (
    release uuid NOT NULL,
    processed timestamp with time zone DEFAULT now(),
    CONSTRAINT bot_discogs_release_packaging_pkey PRIMARY KEY (release)
);
"""

query = """
WITH
    releases_wo_packaging AS (
        SELECT r.id AS release_id, u.url AS discogs_url
        FROM release r
示例#24
0
from simplemediawiki import MediaWiki
from editing import MusicBrainzClient
import pprint
import urllib
import time
from utils import mangle_name, join_names, quote_page_title
import config as cfg

engine = sqlalchemy.create_engine(cfg.MB_DB)
db = engine.connect()
db.execute("SET search_path TO musicbrainz, %s" % cfg.BOT_SCHEMA_DB)

wp = MediaWiki('https://en.wikipedia.org/w/api.php')
wps = solr.SolrConnection('http://localhost:8983/solr/wikipedia')

mb = MusicBrainzClient(cfg.MB_USERNAME, cfg.MB_PASSWORD, cfg.MB_SITE)
"""
CREATE TABLE bot_wp_label (
    gid uuid NOT NULL,
    processed timestamp with time zone DEFAULT now()
);

ALTER TABLE ONLY bot_wp_label
    ADD CONSTRAINT bot_wp_label_pkey PRIMARY KEY (gid);

"""

query = """
WITH
    labels_wo_wikipedia AS (
        SELECT a.id
示例#25
0
        identifier = m.group(1).upper() if m else None
        return identifier if identifier and isrc_valid(identifier) else None


query_releases = '''
SELECT DISTINCT r.id, r.gid, r.barcode
FROM release r
WHERE r.barcode ~ %s
'''

engine = sqlalchemy.create_engine(cfg.MB_DB)
db = engine.connect()
db.execute('SET search_path TO musicbrainz')

zeroinch = ZeroInch()
mb = MusicBrainzClient(cfg.MB_USERNAME, cfg.MB_PASSWORD, cfg.MB_SITE)
ws = MusicBrainzWebservice(cfg.MB_USERNAME, cfg.MB_PASSWORD, cfg.MB_SITE)


def identify_isrc_edit(isrcs):
    return lambda edit_nr, text: set(isrcs) == set(
        re.findall(r'<a href="' + cfg.MB_SITE + r'/isrc/([A-Z0-9]{12})">', text
                   ))


isrc_submitted = set(
    url
    for url, in db.execute('''SELECT url FROM bot_isrc_zeroinch_submitted'''))
isrc_missing = set(
    url
    for url, in db.execute('''SELECT url FROM bot_isrc_zeroinch_missing'''))
from editing import MusicBrainzClient
from mbbot.source.secondhandsongs import SHSWebService
from picard.similarity import similarity2
from kitchen.text.converters import to_unicode
import pprint
import urllib
import urllib2
import time
from utils import mangle_name, join_names, out, colored_out, bcolors
import config as cfg

engine = sqlalchemy.create_engine(cfg.MB_DB)
db = engine.connect()
db.execute("SET search_path TO musicbrainz, %s" % cfg.BOT_SCHEMA_DB)

mb = MusicBrainzClient(cfg.MB_USERNAME, cfg.MB_PASSWORD, cfg.MB_SITE)
shs = SHSWebService()

"""
CREATE TABLE mbbot.bot_shs_link_artist (
    artist uuid NOT NULL,
    processed timestamp with time zone DEFAULT now(),
    CONSTRAINT bot_shs_link_artist_pkey PRIMARY KEY (artist)
);
"""

query = """
WITH
    artists_wo_shs AS (
        SELECT DISTINCT a.id AS artist_id, a.gid AS artist_gid, w.id AS work_id, w.gid AS work_gid, u.url AS shs_url
        FROM artist a
import re
import random
import locale
from collections import defaultdict
import itertools
import sqlalchemy
import discogs_client as discogs
from editing import MusicBrainzClient
import Levenshtein
import config as cfg

engine = sqlalchemy.create_engine(cfg.MB_DB)
db = engine.connect()
db.execute('SET search_path TO musicbrainz')

mb = MusicBrainzClient(cfg.MB_USERNAME, cfg.MB_PASSWORD, cfg.MB_SITE)

discogs.user_agent = 'MusicBrainzDiscogsReleaseGroupsBot/0.1 +https://github.com/weisslj/musicbrainz-bot'

query_rg_without_master = '''
SELECT rg.artist_credit, rg.id, rg.gid, release_name.name
FROM release_group rg
JOIN release_name ON rg.name = release_name.id
WHERE rg.id IN (
    SELECT DISTINCT rg.id
    FROM release_group rg
    JOIN release ON rg.id = release.release_group
    JOIN l_release_url l_ru ON release.id = l_ru.entity0
    JOIN link l ON l_ru.link = l.id
    WHERE l.link_type = 76
    
示例#28
0
);
CREATE TABLE bot_asin_catmismatch (
    gid uuid NOT NULL,
    processed timestamp with time zone DEFAULT now(),
    CONSTRAINT bot_asin_catmismatch_pkey PRIMARY KEY (gid)
);
'''

engine = sqlalchemy.create_engine(cfg.MB_DB)
db = engine.connect()
db.execute("SET search_path TO musicbrainz, mbbot")

editor_id = db.execute('''SELECT id FROM editor WHERE name = %s''',
                       cfg.MB_USERNAME).first()[0]
mb = MusicBrainzClient(cfg.MB_USERNAME,
                       cfg.MB_PASSWORD,
                       cfg.MB_SITE,
                       editor_id=editor_id)

store_map = [
    # http://www.amazon.com/gp/help/customer/display.html/ref=hp_left_cn?nodeId=527692
    ('us', ['US', 'AU']),
    # http://www.amazon.co.uk/gp/help/customer/display.html/ref=ssd?nodeId=1204872
    ('uk', ['GB', 'XE']),
    # http://www.amazon.de/gp/help/customer/display.html/ref=hp_left_sib?nodeId=13464781
    ('de', ['DE', 'AT', 'BE', 'LI', 'LU', 'NL', 'CH', 'XE']),
    # http://www.amazon.fr/gp/help/customer/display.html?nodeId=897502
    ('fr', ['FR', 'MC', 'BE', 'LU', 'CH', 'XE']),
    # http://www.amazon.co.jp/gp/help/customer/display.html/ref=hp_rel_topic?nodeId=1039606
    ('jp', ['JP']),
    # http://www.amazon.ca/gp/help/customer/display.html?nodeId=918742
    ('ca', ['CA']),
示例#29
0
from utils import mangle_name, join_names, out, get_page_content, extract_page_title, colored_out, bcolors, escape_query, quote_page_title, wp_is_canonical_page
from utils import mangle_name, join_names, out, get_page_content, extract_page_title, colored_out, bcolors, escape_query, quote_page_title
import config as cfg

engine = sqlalchemy.create_engine(cfg.MB_DB)
db = engine.connect()
db.execute("SET search_path TO musicbrainz, %s" % cfg.BOT_SCHEMA_DB)

wp_lang = sys.argv[1] if len(sys.argv) > 1 else 'en'

wp = MediaWiki('http://%s.wikipedia.org/w/api.php' % wp_lang)

suffix = '_' + wp_lang if wp_lang != 'en' else ''
wps = solr.SolrConnection('http://localhost:8983/solr/wikipedia' + suffix)

mb = MusicBrainzClient(cfg.MB_USERNAME, cfg.MB_PASSWORD, cfg.MB_SITE)
"""
CREATE TABLE bot_wp_rg_link (
    gid uuid NOT NULL,
    lang character varying(2),
    processed timestamp with time zone DEFAULT now(),
    CONSTRAINT bot_wp_rg_link_pkey PRIMARY KEY (gid, lang)
);
"""

acceptable_countries_for_lang = {'fr': ['FR', 'MC']}
acceptable_countries_for_lang['en'] = acceptable_countries_for_lang['fr']

query_params = []
no_country_filter = (
    wp_lang == 'en') and ('en' not in acceptable_countries_for_lang
#!/usr/bin/python

import sys
import os
import re
import time
import urllib2
import json
from editing import MusicBrainzClient
from utils import out, colored_out, bcolors, monkeypatch_mechanize
import config as cfg

# Work around mechanize bug. See: https://github.com/jjlee/mechanize/pull/58
monkeypatch_mechanize()

mb = MusicBrainzClient(cfg.MB_USERNAME, cfg.MB_PASSWORD, cfg.MB_SITE)

FILE_RE = re.compile(
    r'^(?P<mbid>[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12})-(?P<type>front|back|medium|booklet|tray|sticker)(?:-\d+)?\.(?:jpeg|jpg|png|gif)',
    re.I)


class CoverArtArchiveReleaseInfo(object):
    def __init__(self, release_id):
        try:
            data = urllib2.urlopen('http://coverartarchive.org/release/%s/' %
                                   release_id)
            self.metadata = json.load(data)
        except urllib2.HTTPError:
            self.metadata = {
                'images': [],
#!/usr/bin/python

import re
import sqlalchemy
from editing import MusicBrainzClient
import time
from utils import out, colored_out, bcolors
import config as cfg

engine = sqlalchemy.create_engine(cfg.MB_DB)
db = engine.connect()
db.execute("SET search_path TO musicbrainz, %s" % cfg.BOT_SCHEMA_DB)

mb = MusicBrainzClient(cfg.MB_USERNAME, cfg.MB_PASSWORD, cfg.MB_SITE)

query = """
    SELECT DISTINCT r.id, r.gid, r.name, r.comment, lrw.id AS rel_id, lt.id AS link_type, r.artist_credit
    FROM recording r
        JOIN l_recording_work lrw ON lrw.entity0 = r.id
        JOIN link l ON l.id = lrw.link
        JOIN link_type lt ON l.link_type = lt.id
        JOIN link_attribute la ON la.link = l.id
        JOIN link_attribute_type lat ON la.attribute_type = lat.id AND lat.name = 'live'
    WHERE r.comment ~ E'live, \\\\d{4}(-\\\\d{2})?(-\\\\d{2})?:'
        AND l.begin_date_year IS NULL
        AND l.end_date_year IS NULL
        AND lt.name = 'performance'
        AND r.edits_pending = 0 AND lrw.edits_pending = 0
        /* Only one linked work */
        AND NOT EXISTS (SELECT 1 FROM l_recording_work lrw2 WHERE lrw2.entity0 = r.id AND lrw2.entity1 <> lrw.entity1)
    ORDER BY r.artist_credit
示例#32
0
from simplemediawiki import MediaWiki
from editing import MusicBrainzClient
import pprint
import urllib
import time
from utils import mangle_name, join_names, quote_page_title
import config as cfg

engine = sqlalchemy.create_engine(cfg.MB_DB)
db = engine.connect()
db.execute("SET search_path TO musicbrainz, %s" % cfg.BOT_SCHEMA_DB)

wp = MediaWiki("https://en.wikipedia.org/w/api.php")
wps = solr.SolrConnection("http://localhost:8983/solr/wikipedia")

mb = MusicBrainzClient(cfg.MB_USERNAME, cfg.MB_PASSWORD, cfg.MB_SITE)

"""
CREATE TABLE bot_wp_label (
    gid uuid NOT NULL,
    processed timestamp with time zone DEFAULT now()
);

ALTER TABLE ONLY bot_wp_label
    ADD CONSTRAINT bot_wp_label_pkey PRIMARY KEY (gid);

"""

query = """
WITH
    labels_wo_wikipedia AS (
from simplemediawiki import MediaWiki
from editing import MusicBrainzClient
import pprint
import urllib
import time
from utils import mangle_name, join_names, contains_text_in_script, quote_page_title
import config as cfg

engine = sqlalchemy.create_engine(cfg.MB_DB)
db = engine.connect()
db.execute("SET search_path TO musicbrainz")

wp = MediaWiki('http://ko.wikipedia.org/w/api.php')
wps = solr.SolrConnection('http://localhost:8983/solr/wikipedia_ko')

mb = MusicBrainzClient(cfg.MB_USERNAME, cfg.MB_PASSWORD, cfg.MB_SITE)

"""
CREATE TABLE bot_wp_artist_ko (
    gid uuid NOT NULL,
    processed timestamp with time zone DEFAULT now()
);

ALTER TABLE ONLY bot_wp_artist_ko
    ADD CONSTRAINT bot_wp_artist_kokey PRIMARY KEY (gid);
"""

query = """
WITH
    artists_wo_wikipedia AS (
        SELECT a.id
#!/usr/bin/python

import re
import sqlalchemy
from editing import MusicBrainzClient
import time
from utils import out, colored_out, bcolors
import config as cfg

engine = sqlalchemy.create_engine(cfg.MB_DB)
db = engine.connect()
db.execute("SET search_path TO musicbrainz, %s" % cfg.BOT_SCHEMA_DB)

mb = MusicBrainzClient(cfg.MB_USERNAME, cfg.MB_PASSWORD, cfg.MB_SITE)

query = """
    SELECT DISTINCT r.id, r.gid, r.name, r.comment, lrw.id AS rel_id, lt.id AS link_type, r.artist_credit
    FROM recording r
        JOIN l_recording_work lrw ON lrw.entity0 = r.id
        JOIN link l ON l.id = lrw.link
        JOIN link_type lt ON l.link_type = lt.id
        JOIN link_attribute la ON la.link = l.id
        JOIN link_attribute_type lat ON la.attribute_type = lat.id AND lat.name = 'live'
    WHERE r.comment ~ E'live, \\\\d{4}(-\\\\d{2})?(-\\\\d{2})?:'
        AND l.begin_date_year IS NULL
        AND l.end_date_year IS NULL
        AND lt.name = 'performance'
        AND r.edits_pending = 0 AND lrw.edits_pending = 0
        /* Only one linked work */
        AND NOT EXISTS (SELECT 1 FROM l_recording_work lrw2 WHERE lrw2.entity0 = r.id AND lrw2.entity1 <> lrw.entity1)
    ORDER BY r.artist_credit
from editing import MusicBrainzClient
from mbbot.source.secondhandsongs import SHSWebService
from picard.similarity import similarity2
from kitchen.text.converters import to_unicode
import pprint
import urllib
import urllib2
import time
from utils import mangle_name, join_names, out, colored_out, bcolors
import config as cfg

engine = sqlalchemy.create_engine(cfg.MB_DB)
db = engine.connect()
db.execute("SET search_path TO musicbrainz, %s" % cfg.BOT_SCHEMA_DB)

mb = MusicBrainzClient(cfg.MB_USERNAME, cfg.MB_PASSWORD, cfg.MB_SITE)
shs = SHSWebService()

"""
CREATE TABLE mbbot.bot_shs_link_artist (
    artist uuid NOT NULL,
    processed timestamp with time zone DEFAULT now(),
    CONSTRAINT bot_shs_link_artist_pkey PRIMARY KEY (artist)
);
"""

query = """
WITH
    artists_wo_shs AS (
        SELECT DISTINCT a.id AS artist_id, a.gid AS artist_gid, w.id AS work_id, w.gid AS work_gid, u.url AS shs_url
        FROM artist a
import re
import sqlalchemy
import solr
from editing import MusicBrainzClient
import discogs_client
import pprint
import urllib
import time
from utils import mangle_name, join_names, out, colored_out, bcolors
import config as cfg

engine = sqlalchemy.create_engine(cfg.MB_DB)
db = engine.connect()
db.execute("SET search_path TO musicbrainz, %s" % cfg.BOT_SCHEMA_DB)

mb = MusicBrainzClient(cfg.MB_USERNAME, cfg.MB_PASSWORD, cfg.MB_SITE)

discogs = discogs_client.Client('MusicBrainzBot/0.1 +https://github.com/murdos/musicbrainz-bot')

"""
CREATE TABLE bot_discogs_medium_format (
    medium integer NOT NULL,
    processed timestamp with time zone DEFAULT now(),
    CONSTRAINT bot_discogs_medium_format_pkey PRIMARY KEY (medium)
);
"""

query = """
WITH
    mediums_with_fuzzy_format AS (
        SELECT r.id AS release_id, m.position, m.id AS medium_id, u.url AS discogs_url, m.format
import re
import sqlalchemy
import solr
from editing import MusicBrainzClient
import discogs_client as discogs
import pprint
import urllib
import time
from utils import mangle_name, join_names, out, colored_out, bcolors
import config as cfg

engine = sqlalchemy.create_engine(cfg.MB_DB)
db = engine.connect()
db.execute("SET search_path TO musicbrainz, %s" % cfg.BOT_SCHEMA_DB)

mb = MusicBrainzClient(cfg.MB_USERNAME, cfg.MB_PASSWORD, cfg.MB_SITE)

discogs.user_agent = 'MusicBrainzBot/0.1 +https://github.com/murdos/musicbrainz-bot'

"""
CREATE TABLE bot_discogs_release_packaging (
    release uuid NOT NULL,
    processed timestamp with time zone DEFAULT now(),
    CONSTRAINT bot_discogs_release_packaging_pkey PRIMARY KEY (release)
);
"""

query = """
WITH
    releases_wo_packaging AS (
        SELECT r.id AS release_id, u.url AS discogs_url
from simplemediawiki import MediaWiki
from editing import MusicBrainzClient
import pprint
import urllib
import time
from utils import mangle_name, join_names, contains_text_in_script, quote_page_title
import config as cfg

engine = sqlalchemy.create_engine(cfg.MB_DB)
db = engine.connect()
db.execute("SET search_path TO musicbrainz, %s" % cfg.BOT_SCHEMA_DB)

wp = MediaWiki('http://ko.wikipedia.org/w/api.php')
wps = solr.SolrConnection('http://localhost:8983/solr/wikipedia_ko')

mb = MusicBrainzClient(cfg.MB_USERNAME, cfg.MB_PASSWORD, cfg.MB_SITE)

"""
CREATE TABLE bot_wp_artist_ko (
    gid uuid NOT NULL,
    processed timestamp with time zone DEFAULT now()
);

ALTER TABLE ONLY bot_wp_artist_ko
    ADD CONSTRAINT bot_wp_artist_kokey PRIMARY KEY (gid);
"""

query = """
WITH
    artists_wo_wikipedia AS (
        SELECT a.id
import re
import random
import locale
from collections import defaultdict
import itertools
import sqlalchemy
import discogs_client as discogs
from editing import MusicBrainzClient
import Levenshtein
import config as cfg

engine = sqlalchemy.create_engine(cfg.MB_DB)
db = engine.connect()
db.execute('SET search_path TO musicbrainz')

mb = MusicBrainzClient(cfg.MB_USERNAME, cfg.MB_PASSWORD, cfg.MB_SITE)

discogs.user_agent = 'MusicBrainzDiscogsReleaseGroupsBot/0.1 +https://github.com/weisslj/musicbrainz-bot'

query_rg_without_master = '''
SELECT rg.artist_credit, rg.id, rg.gid, release_name.name
FROM release_group rg
JOIN release_name ON rg.name = release_name.id
WHERE rg.id IN (
    SELECT DISTINCT rg.id
    FROM release_group rg
    JOIN release ON rg.id = release.release_group
    JOIN l_release_url l_ru ON release.id = l_ru.entity0
    JOIN link l ON l_ru.link = l.id
    WHERE l.link_type = 76
    
示例#40
0
def init_mb():
    global mb
    print "Logging in..."
    mb = MusicBrainzClient(config.MB_USERNAME, config.MB_PASSWORD,
                           config.MB_SITE)
示例#41
0
import re
import sqlalchemy
import solr
from editing import MusicBrainzClient
import discogs_client as discogs
import pprint
import urllib
import time
from utils import mangle_name, join_names, out, colored_out, bcolors
import config as cfg

engine = sqlalchemy.create_engine(cfg.MB_DB)
db = engine.connect()
db.execute("SET search_path TO musicbrainz, %s" % cfg.BOT_SCHEMA_DB)

mb = MusicBrainzClient(cfg.MB_USERNAME, cfg.MB_PASSWORD, cfg.MB_SITE)

discogs.user_agent = 'MusicBrainzBot/0.1 +https://github.com/murdos/musicbrainz-bot'

"""
CREATE TABLE bot_discogs_medium_format (
    medium integer NOT NULL,
    processed timestamp with time zone DEFAULT now(),
    CONSTRAINT bot_discogs_medium_format_pkey PRIMARY KEY (medium)
);
"""

query = """
WITH
    mediums_with_fuzzy_format AS (
        SELECT r.id AS release_id, m.position, m.id AS medium_id, u.url AS discogs_url, m.format
示例#42
0
    'artist': 352,
    'label': 354,
    'release-group': 353,
    'work': 351,
    'area': 358,
    'place': 594,
    'series': 749,
    'instrument': 733,
    'event': 790
}

engine = sqlalchemy.create_engine(cfg.MB_DB)
db = engine.connect()
db.execute("SET search_path TO musicbrainz, %s" % cfg.BOT_SCHEMA_DB)

mb = MusicBrainzClient(cfg.MB_USERNAME, cfg.MB_PASSWORD, cfg.MB_SITE)
"""
CREATE TABLE mbbot.bot_wp_wikidata_links (
    gid uuid NOT NULL,
    lang character varying(10),
    processed timestamp with time zone DEFAULT now(),
    CONSTRAINT bot_wp_wikidata_links_pkey PRIMARY KEY (gid, lang)
);
"""


def main(ENTITY_TYPE):

    entity_type_table = ENTITY_TYPE.replace('-', '_')
    url_relationship_table = 'l_%s_url' % entity_type_table if ENTITY_TYPE != 'work' else 'l_url_%s' % entity_type_table
    main_entity_entity_point = "entity0" if ENTITY_TYPE != 'work' else "entity1"
#!/usr/bin/python

import sys
import os
import re
import time
import urllib2
import json
from editing import MusicBrainzClient
from utils import out, colored_out, bcolors, monkeypatch_mechanize
import config as cfg

# Work around mechanize bug. See: https://github.com/jjlee/mechanize/pull/58
monkeypatch_mechanize()

mb = MusicBrainzClient(cfg.MB_USERNAME, cfg.MB_PASSWORD, cfg.MB_SITE)

FILE_RE = re.compile(r'^(?P<mbid>[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12})-(?P<type>front|back|medium|booklet|tray)(?:-\d+)?\.(?:jpeg|jpg|png|gif)', re.I)

class CoverArtArchiveReleaseInfo(object):
	def __init__(self, release_id):
		try:
			data = urllib2.urlopen('http://coverartarchive.org/release/%s/' % release_id)
			self.metadata = json.load(data)
		except urllib2.HTTPError:
			self.metadata = {'images':  [], 'release': 'http://musicbrainz.org/release/%s' % release_id}

	def hasType(self, type):
		for image in self.metadata['images']:
			for img_type in image['types']:
				if img_type.lower() == type.lower():
#!/usr/bin/python

import re
import sqlalchemy
from editing import MusicBrainzClient
import discogs_client
import time
import Levenshtein
from utils import mangle_name, join_names, out, colored_out, bcolors, durationToMS, msToDuration, unaccent
import config as cfg

engine = sqlalchemy.create_engine(cfg.MB_DB)
db = engine.connect()
db.execute("SET search_path TO musicbrainz, %s" % cfg.BOT_SCHEMA_DB)

mb = MusicBrainzClient(cfg.MB_USERNAME, cfg.MB_PASSWORD, cfg.MB_SITE)

discogs = discogs_client.Client('MusicBrainzBot/0.1 +https://github.com/murdos/musicbrainz-bot')

"""
CREATE TABLE bot_discogs_track_number (
    gid uuid NOT NULL,
    processed timestamp with time zone DEFAULT now(),
    CONSTRAINT bot_discogs_track_number_pkey PRIMARY KEY (gid)
);
"""

query = """
WITH
    vinyl_releases AS (
        SELECT DISTINCT r.id, u.url AS discogs_url
#!/usr/bin/python

import re
import sqlalchemy
from editing import MusicBrainzClient
import discogs_client as discogs
import time
import Levenshtein
from utils import mangle_name, join_names, out, colored_out, bcolors, durationToMS, msToDuration, unaccent
import config as cfg

engine = sqlalchemy.create_engine(cfg.MB_DB)
db = engine.connect()
db.execute("SET search_path TO musicbrainz, %s" % cfg.BOT_SCHEMA_DB)

mb = MusicBrainzClient(cfg.MB_USERNAME, cfg.MB_PASSWORD, cfg.MB_SITE)

discogs.user_agent = 'MusicBrainzBot/0.1 +https://github.com/murdos/musicbrainz-bot'
"""
CREATE TABLE bot_discogs_track_number (
    gid uuid NOT NULL,
    processed timestamp with time zone DEFAULT now(),
    CONSTRAINT bot_discogs_track_number_pkey PRIMARY KEY (gid)
);
"""

query = """
WITH
    vinyl_releases AS (
        SELECT DISTINCT r.id, u.url AS discogs_url
        FROM release r