def delete(track_ids, do_commit=True, local=False): # delete one or more track_ids from the fp flat. if not isinstance(track_ids, list): track_ids = [track_ids] # delete a code from FP flat if local: return local_delete(track_ids) with solr.pooled_connection(_fp_solr) as host: for t in track_ids: host.delete_query("track_id:%s*" % t) try: get_tyrant_lock().acquire() delete_list = map(lambda track_id: map(lambda i: "%s-%s" % (track_id, i),range(20)), track_ids) delete_list = sum(delete_list,[]) get_tyrant().multi_del(delete_list) except KeyError: pass finally: get_tyrant_lock().release() if do_commit: commit()
def ingest(fingerprint_list, do_commit=True, local=False): """ Ingest some fingerprints into the fingerprint database. The fingerprints should be of the form {"track_id": id, "fp": fp, "artist": artist, "release": release, "track": track, "length": length, "codever": "codever"} or a list of the same. All parameters except length must be strings. Length is an integer. artist, release and track are not required but highly recommended. length is the length of the track being ingested in seconds. if track_id is empty, one will be generated. """ if not isinstance(fingerprint_list, list): fingerprint_list = [fingerprint_list] docs = [] codes = [] for fprint in fingerprint_list: if not ("track_id" in fprint and "fp" in fprint and "length" in fprint and "codever" in fprint): raise Exception("Missing required fingerprint parameters (track_id, fp, length, codever") split_prints = split_codes(fprint) docs.extend(split_prints) codes.extend(((c["track_id"].encode("utf-8"), c["fp"].encode("utf-8")) for c in split_prints)) if local: return local_ingest(docs, codes) with solr.pooled_connection(_fp_solr) as host: host.add_many(docs) get_tyrant().multi_set(codes) if do_commit: commit()
def ingest(fingerprint_list, do_commit=True, local=False, split=True): """ Ingest some fingerprints into the fingerprint database. The fingerprints should be of the form {"track_id": id, "fp": fp string, "artist": artist, "release": release, "track": track, "length": length, "codever": "codever", "source": source, "import_date":import date} or a list of the same. All parameters except length must be strings. Length is an integer. artist, release and track are not required but highly recommended. The import date should be formatted as an ISO 8601 date (yyyy-mm-ddThh:mm:ssZ) and should be the UTC time that the the import was performed. If the date is missing, the time the script was started will be used. length is the length of the track being ingested in seconds. if track_id is empty, one will be generated. """ if not isinstance(fingerprint_list, list): fingerprint_list = [fingerprint_list] docs = [] codes = [] if split: for fprint in fingerprint_list: if not ("track_id" in fprint and "fp" in fprint and "length" in fprint and "codever" in fprint): raise Exception( "Missing required fingerprint parameters (track_id, fp, length, codever" ) if "import_date" not in fprint: fprint["import_date"] = IMPORTDATE if "source" not in fprint: fprint["source"] = "local" split_prints = split_codes(fprint) docs.extend(split_prints) codes.extend( ((c["track_id"].encode("utf-8"), c["fp"].encode("utf-8")) for c in split_prints)) else: docs.extend(fingerprint_list) codes.extend(((c["track_id"].encode("utf-8"), c["fp"].encode("utf-8")) for c in fingerprint_list)) if local: return local_ingest(docs, codes) with solr.pooled_connection(_fp_solr) as host: host.add_many(docs) get_tyrant_lock().acquire() get_tyrant().multi_set(codes) get_tyrant_lock().release() if do_commit: commit()
def check_for_fields(): with solr.pooled_connection(fp._fp_solr) as host: results = host.query("-source:[* TO *]", rows=1, score=False) if len(results) > 0: print >>sys.stderr, "Missing 'source' field on at least one doc. Run util/upgrade_server.py" sys.exit(1) results = host.query("-import_date:[* TO *]", rows=1, score=False) if len(results) > 0: print >>sys.stderr, "Missing 'import_date' field on at least one doc. Run util/upgrade_server.py" sys.exit(1)
def check_for_fields(): with solr.pooled_connection(fp._fp_solr) as host: results = host.query("-source:[* TO *]", rows=1, score=False) if len(results) > 0: print >> sys.stderr, "Missing 'source' field on at least one doc. Run util/upgrade_server.py" sys.exit(1) results = host.query("-import_date:[* TO *]", rows=1, score=False) if len(results) > 0: print >> sys.stderr, "Missing 'import_date' field on at least one doc. Run util/upgrade_server.py" sys.exit(1)
def ingest(fingerprint_list, do_commit=True, local=False, split=True): """ Ingest some fingerprints into the fingerprint database. The fingerprints should be of the form {"track_id": id, "fp": fp string, "artist": artist, "release": release, "track": track, "length": length, "codever": "codever", "source": source, "import_date":import date} or a list of the same. All parameters except length must be strings. Length is an integer. artist, release and track are not required but highly recommended. The import date should be formatted as an ISO 8601 date (yyyy-mm-ddThh:mm:ssZ) and should be the UTC time that the the import was performed. If the date is missing, the time the script was started will be used. length is the length of the track being ingested in seconds. if track_id is empty, one will be generated. """ if not isinstance(fingerprint_list, list): fingerprint_list = [fingerprint_list] docs = [] codes = [] if split: for fprint in fingerprint_list: if not ("track_id" in fprint and "fp" in fprint and "length" in fprint and "codever" in fprint): raise Exception("Missing required fingerprint parameters (track_id, fp, length, codever") if "import_date" not in fprint: fprint["import_date"] = IMPORTDATE if "source" not in fprint: fprint["source"] = "local" split_prints = split_codes(fprint) docs.extend(split_prints) codes.extend(((c["track_id"].encode("utf-8"), c["fp"].encode("utf-8")) for c in split_prints)) else: docs.extend(fingerprint_list) codes.extend(((c["track_id"].encode("utf-8"), c["fp"].encode("utf-8")) for c in fingerprint_list)) if local: return local_ingest(docs, codes) try: get_tyrant_lock().acquire() get_tyrant().multi_set(codes) finally: get_tyrant_lock().release() with solr.pooled_connection(_fp_solr) as host: host.add_many(docs) if do_commit: commit()
def query_fp(self, code_string, rows=15, get_data=False): try: # query the fp flat if get_data: fields = "track_id,artist,release,track,length" else: fields = "track_id" with solr.pooled_connection(self._fp_solr) as host: resp = host.query(code_string, qt="/hashq", rows=rows, fields=fields) return resp except solr.SolrException: return None
def erase_database(self, really_delete=False): """ This method will delete your ENTIRE database. Only use it if you know what you're doing. """ if not really_delete: raise Exception("Won't delete unless you pass in really_delete=True") with solr.pooled_connection(self._fp_solr) as host: host.delete_query("*:*") host.commit() self.tyrant.multi_del(self.tyrant.keys())
def metadata_for_track_id(self, track_id, append_end=True): if not track_id or not len(track_id): return {} # Assume track_ids have 1 - and it's at the end of the id. if append_end: track_id = "%s-0" % track_id with solr.pooled_connection(self._fp_solr) as host: response = host.query("track_id:%s" % track_id) if len(response.results): return response.results[0] return {}
def main(): print "setting source to '%s', import date to %s" % (SOURCE, IMPORTDATE) with solr.pooled_connection(fp._fp_solr) as host: # Find rows where source field doesn't exist results = host.query("-source:[* TO *]", rows=ROWS_PER_QUERY, score=False) resultlen = len(results) while resultlen > 0: print "got",resultlen,"results" processed = process_results(results.results) host.add_many(processed) host.commit() results = host.query("-source:[* TO *]", rows=ROWS_PER_QUERY, score=False) resultlen = len(results) print "done"
def query_fp(code_string, rows=15, local=False, get_data=False): if local: return local_query_fp(code_string, rows, get_data=get_data) try: # query the fp flat if get_data: fields = "track_id,artist,release,track,length,youtube,characters" else: fields = "track_id" with solr.pooled_connection(_fp_solr) as host: resp = host.query(code_string, qt="/hashq", rows=rows, fields=fields) return resp except solr.SolrException: return None
def query_fp(code_string, rows=15, local=False, get_data=False): if local: return local_query_fp(code_string, rows, get_data=get_data) try: # query the fp flat if get_data: fields = "track_id,artist,release,track,length" else: fields = "track_id" with solr.pooled_connection(_fp_solr) as host: resp = host.query(code_string, qt="/hashq", rows=rows, fields=fields) return resp except solr.SolrException: return None
def delete(self, track_ids, do_commit=True): # delete one or more track_ids from the fp flat. if not isinstance(track_ids, list): track_ids = [track_ids] with solr.pooled_connection(self._fp_solr) as host: for t in track_ids: host.delete_query("track_id:%s*" % t) try: self.tyrant.multi_del(track_ids) except KeyError: pass if do_commit: self.commit()
def erase_database(really_delete=False, local=False): """ This method will delete your ENTIRE database. Only use it if you know what you're doing. """ if not really_delete: raise Exception("Won't delete unless you pass in really_delete=True") if local: return local_erase_database() with solr.pooled_connection(_fp_solr) as host: host.delete_query("*:*") host.commit() tyrant = get_tyrant() tyrant.multi_del(tyrant.keys())
def metadata_for_track_id(track_id, local=False): if not track_id or not len(track_id): return {} # Assume track_ids have 1 - and it's at the end of the id. if "-" not in track_id: track_id = "%s-0" % track_id if local: return _fake_solr["metadata"][track_id] with solr.pooled_connection(_fp_solr) as host: response = host.query("track_id:%s" % track_id) if len(response.results): return response.results[0] else: return {}
def erase_database(really_delete=False, local=False): """ This method will delete your ENTIRE database. Only use it if you know what you're doing. """ if not really_delete: raise Exception("Won't delete unless you pass in really_delete=True") if local: return local_erase_database() with solr.pooled_connection(_fp_solr) as host: host.delete_query("*:*") host.commit() tyrant = get_tyrant() get_tyrant_lock().acquire() tyrant.multi_del(tyrant.keys()) get_tyrant_lock().release()
def main(): print "setting source to '%s', import date to %s" % (SOURCE, IMPORTDATE) with solr.pooled_connection(fp._fp_solr) as host: # Find rows where source field doesn't exist results = host.query("-source:[* TO *]", rows=ROWS_PER_QUERY, score=False) resultlen = len(results) while resultlen > 0: print "got", resultlen, "results" processed = process_results(results.results) host.add_many(processed) host.commit() results = host.query("-source:[* TO *]", rows=ROWS_PER_QUERY, score=False) resultlen = len(results) print "done"
def delete(track_ids, do_commit=True, local=False): # delete one or more track_ids from the fp flat. if not isinstance(track_ids, list): track_ids = [track_ids] # delete a code from FP flat if local: return local_delete(track_ids) with solr.pooled_connection(_fp_solr) as host: for t in track_ids: host.delete_query("track_id:%s*" % t) try: get_tyrant().multi_del(track_ids) except KeyError: pass if do_commit: commit()
def dump(start=0): try: lastdump = tyrant["lastdump"] except KeyError: lastdump = "*" filecount = 1 itemcount = 1 filename = FILENAME_TEMPLATE % (now, filecount) writer = csv.writer(open(filename, "w")) with solr.pooled_connection(fp._fp_solr) as host: items_to_dump = host.query("import_date:[%s TO %s]" % (lastdump, now), rows=10000, start=start) print "going to dump %s entries" % items_to_dump.results.numFound resultlen = len(items_to_dump) while resultlen > 0: print "writing %d results from start=%s" % ( resultlen, items_to_dump.results.start) for r in items_to_dump.results: row = [ r["track_id"], r["codever"], tyrant[str(r["track_id"])], r["length"], r.get("artist", ""), r.get("release", ""), r.get("track", "") ] writer.writerow(row) itemcount += resultlen if itemcount > ITEMS_PER_FILE: filecount += 1 filename = FILENAME_TEMPLATE % (now, filecount) print "Making new file, %s" % filename writer = csv.writer(open(filename, "w")) itemcount = resultlen items_to_dump = items_to_dump.next_batch() resultlen = len(items_to_dump) # Write the final completion time tyrant["lastdump"] = now
def dump(start=0): try: # lastdump = tyrant["lastdump"] lastdump = "*" except KeyError: lastdump = "*" filecount = 1 itemcount = 1 filename = FILENAME_TEMPLATE % (now, filecount) writer = csv.writer(open(filename, "w")) with solr.pooled_connection(fp._fp_solr) as host: items_to_dump = host.query("import_date:[%s TO %s]" % (lastdump, now), rows=10000, start=start) print "going to dump %s entries" % items_to_dump.results.numFound resultlen = len(items_to_dump) while resultlen > 0: print "writing %d results from start=%s" % (resultlen, items_to_dump.results.start) for r in items_to_dump.results: row = [r["track_id"], r["codever"], tyrant[str(r["track_id"])], r["length"], r.get("artist", ""), r.get("release", ""), r.get("track", "") ] writer.writerow(row) itemcount += resultlen if itemcount > ITEMS_PER_FILE: filecount += 1 filename = FILENAME_TEMPLATE % (now, filecount) print "Making new file, %s" % filename writer = csv.writer(open(filename, "w")) itemcount = resultlen items_to_dump = items_to_dump.next_batch() resultlen = len(items_to_dump) # Write the final completion time tyrant["lastdump"] = now
def commit(self): with solr.pooled_connection(self._fp_solr) as host: host.commit()
def commit(local=False): with solr.pooled_connection(_fp_solr) as host: host.commit()