Пример #1
0
def delete(track_ids, do_commit=True, local=False):
    # delete one or more track_ids from the fp flat.
    if not isinstance(track_ids, list):
        track_ids = [track_ids]

    # delete a code from FP flat
    if local:
        return local_delete(track_ids)

    with solr.pooled_connection(_fp_solr) as host:
        for t in track_ids:
            host.delete_query("track_id:%s*" % t)

    try:
        get_tyrant_lock().acquire()
        delete_list = map(lambda track_id: map(lambda i: "%s-%s" % (track_id, i),range(20)), track_ids)
        delete_list = sum(delete_list,[])
        get_tyrant().multi_del(delete_list)
    except KeyError:
        pass
    finally:
        get_tyrant_lock().release()

    if do_commit:
        commit()
Пример #2
0
def ingest(fingerprint_list, do_commit=True, local=False):
    """ Ingest some fingerprints into the fingerprint database.
        The fingerprints should be of the form
          {"track_id": id, "fp": fp, "artist": artist, "release": release, "track": track, "length": length, "codever": "codever"}
        or a list of the same. All parameters except length must be strings. Length is an integer.
        artist, release and track are not required but highly recommended.
        length is the length of the track being ingested in seconds.
        if track_id is empty, one will be generated.
    """
    if not isinstance(fingerprint_list, list):
        fingerprint_list = [fingerprint_list]
        
    docs = []
    codes = []
    for fprint in fingerprint_list:
        if not ("track_id" in fprint and "fp" in fprint and "length" in fprint and "codever" in fprint):
            raise Exception("Missing required fingerprint parameters (track_id, fp, length, codever")
        split_prints = split_codes(fprint)
        docs.extend(split_prints)
        codes.extend(((c["track_id"].encode("utf-8"), c["fp"].encode("utf-8")) for c in split_prints))

    if local:
        return local_ingest(docs, codes)

    with solr.pooled_connection(_fp_solr) as host:
        host.add_many(docs)

    get_tyrant().multi_set(codes)

    if do_commit:
        commit()
Пример #3
0
def ingest(fingerprint_list, do_commit=True, local=False, split=True):
    """ Ingest some fingerprints into the fingerprint database.
        The fingerprints should be of the form
          {"track_id": id,
          "fp": fp string,
          "artist": artist,
          "release": release,
          "track": track,
          "length": length,
          "codever": "codever",
          "source": source,
          "import_date":import date}
        or a list of the same. All parameters except length must be strings. Length is an integer.
        artist, release and track are not required but highly recommended.
        The import date should be formatted as an ISO 8601 date (yyyy-mm-ddThh:mm:ssZ) and should
        be the UTC time that the the import was performed. If the date is missing, the time the
        script was started will be used.
        length is the length of the track being ingested in seconds.
        if track_id is empty, one will be generated.
    """
    if not isinstance(fingerprint_list, list):
        fingerprint_list = [fingerprint_list]

    docs = []
    codes = []
    if split:
        for fprint in fingerprint_list:
            if not ("track_id" in fprint and "fp" in fprint
                    and "length" in fprint and "codever" in fprint):
                raise Exception(
                    "Missing required fingerprint parameters (track_id, fp, length, codever"
                )
            if "import_date" not in fprint:
                fprint["import_date"] = IMPORTDATE
            if "source" not in fprint:
                fprint["source"] = "local"
            split_prints = split_codes(fprint)
            docs.extend(split_prints)
            codes.extend(
                ((c["track_id"].encode("utf-8"), c["fp"].encode("utf-8"))
                 for c in split_prints))
    else:
        docs.extend(fingerprint_list)
        codes.extend(((c["track_id"].encode("utf-8"), c["fp"].encode("utf-8"))
                      for c in fingerprint_list))

    if local:
        return local_ingest(docs, codes)

    with solr.pooled_connection(_fp_solr) as host:
        host.add_many(docs)

    get_tyrant_lock().acquire()
    get_tyrant().multi_set(codes)
    get_tyrant_lock().release()

    if do_commit:
        commit()
Пример #4
0
def check_for_fields():
    with solr.pooled_connection(fp._fp_solr) as host:
        results = host.query("-source:[* TO *]", rows=1, score=False)
        if len(results) > 0:
            print >>sys.stderr, "Missing 'source' field on at least one doc. Run util/upgrade_server.py"
            sys.exit(1)
        results = host.query("-import_date:[* TO *]", rows=1, score=False)
        if len(results) > 0:
            print >>sys.stderr, "Missing 'import_date' field on at least one doc. Run util/upgrade_server.py"
            sys.exit(1)        
Пример #5
0
def check_for_fields():
    with solr.pooled_connection(fp._fp_solr) as host:
        results = host.query("-source:[* TO *]", rows=1, score=False)
        if len(results) > 0:
            print >> sys.stderr, "Missing 'source' field on at least one doc. Run util/upgrade_server.py"
            sys.exit(1)
        results = host.query("-import_date:[* TO *]", rows=1, score=False)
        if len(results) > 0:
            print >> sys.stderr, "Missing 'import_date' field on at least one doc. Run util/upgrade_server.py"
            sys.exit(1)
Пример #6
0
def ingest(fingerprint_list, do_commit=True, local=False, split=True):
    """ Ingest some fingerprints into the fingerprint database.
        The fingerprints should be of the form
          {"track_id": id,
          "fp": fp string,
          "artist": artist,
          "release": release,
          "track": track,
          "length": length,
          "codever": "codever",
          "source": source,
          "import_date":import date}
        or a list of the same. All parameters except length must be strings. Length is an integer.
        artist, release and track are not required but highly recommended.
        The import date should be formatted as an ISO 8601 date (yyyy-mm-ddThh:mm:ssZ) and should
        be the UTC time that the the import was performed. If the date is missing, the time the
        script was started will be used.
        length is the length of the track being ingested in seconds.
        if track_id is empty, one will be generated.
    """
    if not isinstance(fingerprint_list, list):
        fingerprint_list = [fingerprint_list]

    docs = []
    codes = []
    if split:
        for fprint in fingerprint_list:
            if not ("track_id" in fprint and "fp" in fprint and "length" in fprint and "codever" in fprint):
                raise Exception("Missing required fingerprint parameters (track_id, fp, length, codever")
            if "import_date" not in fprint:
                fprint["import_date"] = IMPORTDATE
            if "source" not in fprint:
                fprint["source"] = "local"
            split_prints = split_codes(fprint)
            docs.extend(split_prints)
            codes.extend(((c["track_id"].encode("utf-8"), c["fp"].encode("utf-8")) for c in split_prints))
    else:
        docs.extend(fingerprint_list)
        codes.extend(((c["track_id"].encode("utf-8"), c["fp"].encode("utf-8")) for c in fingerprint_list))

    if local:
        return local_ingest(docs, codes)

    try:
        get_tyrant_lock().acquire()
        get_tyrant().multi_set(codes)
    finally:
        get_tyrant_lock().release()

    with solr.pooled_connection(_fp_solr) as host:
        host.add_many(docs)

    if do_commit:
        commit()
Пример #7
0
 def query_fp(self, code_string, rows=15, get_data=False):
     try:
         # query the fp flat
         if get_data:
             fields = "track_id,artist,release,track,length"
         else:
             fields = "track_id"
         with solr.pooled_connection(self._fp_solr) as host:
             resp = host.query(code_string, qt="/hashq", rows=rows, fields=fields)
         return resp
     except solr.SolrException:
         return None
Пример #8
0
    def erase_database(self, really_delete=False):
        """ This method will delete your ENTIRE database. Only use it if you
            know what you're doing.
        """
        if not really_delete:
            raise Exception("Won't delete unless you pass in really_delete=True")

        with solr.pooled_connection(self._fp_solr) as host:
            host.delete_query("*:*")
            host.commit()

        self.tyrant.multi_del(self.tyrant.keys())
Пример #9
0
    def metadata_for_track_id(self, track_id, append_end=True):
        if not track_id or not len(track_id):
            return {}
        # Assume track_ids have 1 - and it's at the end of the id.
        if append_end:
            track_id = "%s-0" % track_id

        with solr.pooled_connection(self._fp_solr) as host:
            response = host.query("track_id:%s" % track_id)

        if len(response.results):
            return response.results[0]
        return {}
Пример #10
0
def main():
    print "setting source to '%s', import date to %s" % (SOURCE, IMPORTDATE)
    with solr.pooled_connection(fp._fp_solr) as host:
        # Find rows where source field doesn't exist
        results = host.query("-source:[* TO *]", rows=ROWS_PER_QUERY, score=False)
        resultlen = len(results)
        while resultlen > 0:
            print "got",resultlen,"results"
            processed = process_results(results.results)
            host.add_many(processed)
            host.commit()
            results = host.query("-source:[* TO *]", rows=ROWS_PER_QUERY, score=False)
            resultlen = len(results)
        print "done"
Пример #11
0
def query_fp(code_string, rows=15, local=False, get_data=False):
    if local:
        return local_query_fp(code_string, rows, get_data=get_data)

    try:
        # query the fp flat
        if get_data:
            fields = "track_id,artist,release,track,length,youtube,characters"
        else:
            fields = "track_id"
        with solr.pooled_connection(_fp_solr) as host:
            resp = host.query(code_string, qt="/hashq", rows=rows, fields=fields)
        return resp
    except solr.SolrException:
        return None
Пример #12
0
def query_fp(code_string, rows=15, local=False, get_data=False):
    if local:
        return local_query_fp(code_string, rows, get_data=get_data)
    
    try:
        # query the fp flat
        if get_data:
            fields = "track_id,artist,release,track,length"
        else:
            fields = "track_id"
        with solr.pooled_connection(_fp_solr) as host:
            resp = host.query(code_string, qt="/hashq", rows=rows, fields=fields)
        return resp
    except solr.SolrException:
        return None
Пример #13
0
    def delete(self, track_ids, do_commit=True):
        # delete one or more track_ids from the fp flat.
        if not isinstance(track_ids, list):
            track_ids = [track_ids]

        with solr.pooled_connection(self._fp_solr) as host:
            for t in track_ids:
                host.delete_query("track_id:%s*" % t)

        try:
            self.tyrant.multi_del(track_ids)
        except KeyError:
            pass

        if do_commit:
            self.commit()
Пример #14
0
def erase_database(really_delete=False, local=False):
    """ This method will delete your ENTIRE database. Only use it if you
        know what you're doing.
    """ 
    if not really_delete:
        raise Exception("Won't delete unless you pass in really_delete=True")

    if local:
        return local_erase_database()

    with solr.pooled_connection(_fp_solr) as host:
        host.delete_query("*:*")
        host.commit()

    tyrant = get_tyrant()
    tyrant.multi_del(tyrant.keys())
Пример #15
0
def metadata_for_track_id(track_id, local=False):
    if not track_id or not len(track_id):
        return {}
    # Assume track_ids have 1 - and it's at the end of the id.
    if "-" not in track_id:
        track_id = "%s-0" % track_id
        
    if local:
        return _fake_solr["metadata"][track_id]
        
    with solr.pooled_connection(_fp_solr) as host:
        response = host.query("track_id:%s" % track_id)

    if len(response.results):
        return response.results[0]
    else:
        return {}
Пример #16
0
def metadata_for_track_id(track_id, local=False):
    if not track_id or not len(track_id):
        return {}
    # Assume track_ids have 1 - and it's at the end of the id.
    if "-" not in track_id:
        track_id = "%s-0" % track_id
        
    if local:
        return _fake_solr["metadata"][track_id]
        
    with solr.pooled_connection(_fp_solr) as host:
        response = host.query("track_id:%s" % track_id)

    if len(response.results):
        return response.results[0]
    else:
        return {}
Пример #17
0
def erase_database(really_delete=False, local=False):
    """ This method will delete your ENTIRE database. Only use it if you
        know what you're doing.
    """
    if not really_delete:
        raise Exception("Won't delete unless you pass in really_delete=True")

    if local:
        return local_erase_database()

    with solr.pooled_connection(_fp_solr) as host:
        host.delete_query("*:*")
        host.commit()

    tyrant = get_tyrant()
    get_tyrant_lock().acquire()
    tyrant.multi_del(tyrant.keys())
    get_tyrant_lock().release()
Пример #18
0
def main():
    print "setting source to '%s', import date to %s" % (SOURCE, IMPORTDATE)
    with solr.pooled_connection(fp._fp_solr) as host:
        # Find rows where source field doesn't exist
        results = host.query("-source:[* TO *]",
                             rows=ROWS_PER_QUERY,
                             score=False)
        resultlen = len(results)
        while resultlen > 0:
            print "got", resultlen, "results"
            processed = process_results(results.results)
            host.add_many(processed)
            host.commit()
            results = host.query("-source:[* TO *]",
                                 rows=ROWS_PER_QUERY,
                                 score=False)
            resultlen = len(results)
        print "done"
Пример #19
0
def delete(track_ids, do_commit=True, local=False):
    # delete one or more track_ids from the fp flat. 
    if not isinstance(track_ids, list):
        track_ids = [track_ids]

    # delete a code from FP flat
    if local:
        return local_delete(track_ids)

    with solr.pooled_connection(_fp_solr) as host:
        for t in track_ids:
            host.delete_query("track_id:%s*" % t)
    
    try:
        get_tyrant().multi_del(track_ids)
    except KeyError:
        pass
    
    if do_commit:
        commit()
Пример #20
0
def delete(track_ids, do_commit=True, local=False):
    # delete one or more track_ids from the fp flat. 
    if not isinstance(track_ids, list):
        track_ids = [track_ids]

    # delete a code from FP flat
    if local:
        return local_delete(track_ids)

    with solr.pooled_connection(_fp_solr) as host:
        for t in track_ids:
            host.delete_query("track_id:%s*" % t)
    
    try:
        get_tyrant().multi_del(track_ids)
    except KeyError:
        pass
    
    if do_commit:
        commit()
Пример #21
0
def dump(start=0):
    try:
        lastdump = tyrant["lastdump"]
    except KeyError:
        lastdump = "*"

    filecount = 1
    itemcount = 1
    filename = FILENAME_TEMPLATE % (now, filecount)
    writer = csv.writer(open(filename, "w"))
    with solr.pooled_connection(fp._fp_solr) as host:
        items_to_dump = host.query("import_date:[%s TO %s]" % (lastdump, now),
                                   rows=10000,
                                   start=start)
        print "going to dump %s entries" % items_to_dump.results.numFound
        resultlen = len(items_to_dump)
        while resultlen > 0:
            print "writing %d results from start=%s" % (
                resultlen, items_to_dump.results.start)
            for r in items_to_dump.results:
                row = [
                    r["track_id"], r["codever"], tyrant[str(r["track_id"])],
                    r["length"],
                    r.get("artist", ""),
                    r.get("release", ""),
                    r.get("track", "")
                ]
                writer.writerow(row)
            itemcount += resultlen
            if itemcount > ITEMS_PER_FILE:
                filecount += 1
                filename = FILENAME_TEMPLATE % (now, filecount)
                print "Making new file, %s" % filename
                writer = csv.writer(open(filename, "w"))
                itemcount = resultlen
            items_to_dump = items_to_dump.next_batch()
            resultlen = len(items_to_dump)

    # Write the final completion time
    tyrant["lastdump"] = now
def dump(start=0):
    try:
#        lastdump = tyrant["lastdump"]
	 lastdump = "*"
    except KeyError:
        lastdump = "*"

    filecount = 1
    itemcount = 1
    filename = FILENAME_TEMPLATE % (now, filecount)
    writer = csv.writer(open(filename, "w"))
    with solr.pooled_connection(fp._fp_solr) as host:
        items_to_dump = host.query("import_date:[%s TO %s]" % (lastdump, now), rows=10000, start=start)
        print "going to dump %s entries" % items_to_dump.results.numFound
        resultlen = len(items_to_dump)
        while resultlen > 0:
            print "writing %d results from start=%s" % (resultlen, items_to_dump.results.start)
            for r in items_to_dump.results:
                row = [r["track_id"],
                       r["codever"],
                       tyrant[str(r["track_id"])],
                       r["length"],
                       r.get("artist", ""),
                       r.get("release", ""),
                       r.get("track", "")
                      ]
                writer.writerow(row)
            itemcount += resultlen
            if itemcount > ITEMS_PER_FILE:
                filecount += 1
                filename = FILENAME_TEMPLATE % (now, filecount)
                print "Making new file, %s" % filename
                writer = csv.writer(open(filename, "w"))
                itemcount = resultlen
            items_to_dump = items_to_dump.next_batch()
            resultlen = len(items_to_dump)

    # Write the final completion time
    tyrant["lastdump"] = now
Пример #23
0
 def commit(self):
     with solr.pooled_connection(self._fp_solr) as host:
         host.commit()
Пример #24
0
def commit(local=False):
    with solr.pooled_connection(_fp_solr) as host:
        host.commit()
Пример #25
0
def commit(local=False):
    with solr.pooled_connection(_fp_solr) as host:
        host.commit()