def main(): db.init_db_engine(config.SQLALCHEMY_DATABASE_URI) source = data.load_source("musicbrainz") scraper = data.load_latest_scraper_for_source(source) print(scraper) mbids = get_mbids() total = len(mbids) starttime = time.time() done = 0 for mblist in util.chunks(mbids, 100): lookup(mblist, scraper) done += 100 durdelta, remdelta = util.stats(done, total, starttime) log.info("Done %s/%s in %s; %s remaining", done, total, str(durdelta), str(remdelta))
def process_file(module, filename, numworkers, save=False): data = [] with open(filename) as csvfile: for query in csv.DictReader(csvfile): data.append(query) total = len(data) starttime = time.time() done = 0 CHUNK_SIZE = 1 for items in util.chunks(data, CHUNK_SIZE): process_items(items, module, save, numworkers) done += CHUNK_SIZE durdelta, remdelta = util.stats(done, total, starttime) time.sleep(random.uniform(.5, 1.5)) log.info("Done %s/%s in %s; %s remaining", done, total, str(durdelta), str(remdelta))
def main(): db.init_db_engine(config.SQLALCHEMY_DATABASE_URI) source = data.load_source("musicbrainz") scraper = data.load_latest_scraper_for_source(source) recordings = get_recordings() total = len(recordings) done = 0 starttime = time.time() log.info("starting..., %s recordings to process", total) for reclist in util.chunks(recordings, 10000): log.info("have %s recordings", len(reclist)) with db.engine.connect() as connection: saveddata = get_data(connection, scraper["id"], reclist) log.info(" - got %s rows matching them", len(saveddata)) process(connection, saveddata) done += len(reclist) durdelta, remdelta = util.stats(done, total, starttime) log.info("Done %s/%s in %s; %s remaining", done, total, str(durdelta), str(remdelta))
def main(): db.init_db_engine(config.SQLALCHEMY_DATABASE_URI) log.info("Release groups") releasegroups = get_rgs() fieldnames = ["mbid", "release_title", "artist", "year"] with open("release-group-meta.csv", "w") as fp: w = csv.DictWriter(fp, fieldnames=fieldnames) w.writeheader() for rg in releasegroups: w.writerow(rg) log.info("Recordings") recordings = get_recordings() fieldnames = ["mbid", "recording", "artist"] count = (len(recordings) // 8) + 1 for i, reclist in enumerate(util.chunks(recordings, count), 1): with open("recording-meta-{}.csv".format(i), "w") as fp: w = csv.DictWriter(fp, fieldnames=fieldnames) w.writeheader() for rec in reclist: w.writerow(rec)
def main(): db.init_db_engine(config.SQLALCHEMY_DATABASE_URI) log.info("Release groups") releasegroups = get_rgs() fieldnames = ["mbid", "release_title", "artist", "year"] with open("release-group-meta.csv", "w") as fp: w = csv.DictWriter(fp, fieldnames=fieldnames) w.writeheader() for rg in releasegroups: w.writerow(rg) log.info("Recordings") recordings = get_recordings() fieldnames = ["mbid", "recording", "artist"] count = (len(recordings)//8) + 1 for i, reclist in enumerate(util.chunks(recordings, count), 1): with open("recording-meta-{}.csv".format(i), "w") as fp: w = csv.DictWriter(fp, fieldnames=fieldnames) w.writeheader() for rec in reclist: w.writerow(rec)