Exemplo n.º 1
0
def find_track_duplicates(conn, fingerprint, index=None):
    with conn.begin():
        searcher = FingerprintSearcher(conn, index)
        searcher.min_score = const.TRACK_MERGE_THRESHOLD
        matches = searcher.search(fingerprint['fingerprint'], fingerprint['length'])
        if not matches:
            logger.debug("Not matched itself!")
            return
        logged = False
        match = matches[0]
        all_track_ids = set()
        possible_track_ids = set()
        for m in matches:
            if m['track_id'] in all_track_ids:
                continue
            all_track_ids.add(m['track_id'])
            if can_add_fp_to_track(conn, m['track_id'], fingerprint['fingerprint'], fingerprint['length']):
                if m['id'] != fingerprint['id']:
                    if not logged:
                        logger.debug("Deduplicating fingerprint %d", fingerprint['id'])
                        logged = True
                    logger.debug("Fingerprint %d with track %d is %d%% similar", m['id'], m['track_id'], m['score'] * 100)
                possible_track_ids.add(m['track_id'])
        if len(possible_track_ids) > 1:
            for group in can_merge_tracks(conn, possible_track_ids):
                if len(group) > 1:
                    target_track_id = min(group)
                    group.remove(target_track_id)
                    #logger.debug("Would like to merge tracks %r into %d", list(group), target_track_id)
                    merge_tracks(conn, target_track_id, list(group))
                    #raise Exception(1)
                    break
        conn.execute("INSERT INTO fingerprint_deduplicate (id) VALUES (%s)", fingerprint['id'])
Exemplo n.º 2
0
 def _handle_internal(self, params):
     import time
     t = time.time()
     searcher = FingerprintSearcher(self.conn, self.index)
     if params.batch:
         fingerprints = params.fingerprints
     else:
         fingerprints = params.fingerprints[:1]
     all_matches = []
     for p in fingerprints:
         if p['track_gid']:
             track_id = resolve_track_gid(self.conn, p['track_gid'])
             matches = [(0, track_id, p['track_gid'], 1.0)]
         else:
             matches = searcher.search(p['fingerprint'], p['duration'])
         all_matches.append(matches)
     response = {}
     if params.batch:
         response['fingerprints'] = fps = []
         result_map = {}
         for p, matches in zip(fingerprints, all_matches):
             results = []
             fps.append({'index': p['index'], 'results': results})
             track_ids = self._inject_results(results, result_map, matches)
             logger.info("Lookup from %s: %s", params.application_id, list(track_ids))
     else:
         response['results'] = results = []
         result_map = {}
         self._inject_results(results, result_map, all_matches[0])
         logger.info("Lookup from %s: %s", params.application_id, result_map.keys())
     if params.meta and result_map:
         self.inject_metadata(params.meta, result_map)
     logger.info("Lookup took %s", time.time() - t)
     return response
Exemplo n.º 3
0
def main(script, opts, args):
    conn = script.engine.connect()
    min_id, max_id = conn.execute("SELECT min(id), max(id) FROM fingerprint").fetchone()
    print min_id, max_id
    while True:
        id = random.randint(min_id, max_id)
        row = conn.execute("SELECT fingerprint, length FROM fingerprint WHERE id = %s", (id,)).fetchone()
        if row is None:
            continue
        fingerprint, length = row
        for i in range(1):
            t0 = time.time()
            for i in range(len(fingerprint)):
                fingerprint[i] ^= random.getrandbits(2) << random.randint(0, 20)
            searcher = FingerprintSearcher(script.engine, script.index)
            matches = searcher.search(fingerprint, length + random.randint(-8, 8))
            track_ids = [r[1] for r in matches]
            track_mbid_map = lookup_mbids(conn, track_ids)
            mbids = set()
            for track_id, track_mbids in track_mbid_map.iteritems():
                for mbid, sources in track_mbids:
                    mbids.add(mbid)
            metadata = lookup_metadata(conn, mbids, load_releases=True, load_release_groups=True)
            print "Searching for ID", id, len(matches), time.time() - t0
        time.sleep(1)
Exemplo n.º 4
0
def find_track_duplicates(conn, fingerprint, index=None):
    with conn.begin():
        searcher = FingerprintSearcher(conn, index)
        searcher.min_score = const.TRACK_MERGE_THRESHOLD
        matches = searcher.search(fingerprint['fingerprint'], fingerprint['length'])
        if not matches:
            logger.debug("Not matched itself!")
            return
        logged = False
        all_track_ids = set()
        possible_track_ids = set()
        for m in matches:
            if m['track_id'] in all_track_ids:
                continue
            all_track_ids.add(m['track_id'])
            if can_add_fp_to_track(conn, m['track_id'], fingerprint['fingerprint'], fingerprint['length']):
                if m['id'] != fingerprint['id']:
                    if not logged:
                        logger.debug("Deduplicating fingerprint %d", fingerprint['id'])
                        logged = True
                    logger.debug("Fingerprint %d with track %d is %d%% similar", m['id'], m['track_id'], m['score'] * 100)
                possible_track_ids.add(m['track_id'])
        if len(possible_track_ids) > 1:
            for group in can_merge_tracks(conn, possible_track_ids):
                if len(group) > 1:
                    target_track_id = min(group)
                    group.remove(target_track_id)
                    merge_tracks(conn, target_track_id, list(group))
                    break
        conn.execute("INSERT INTO fingerprint_deduplicate (id) VALUES (%s)", fingerprint['id'])
Exemplo n.º 5
0
def main(script, opts, args):
    conn = script.engine.connect()
    min_id, max_id = conn.execute(
        "SELECT min(id), max(id) FROM fingerprint").fetchone()
    print min_id, max_id
    while True:
        id = random.randint(min_id, max_id)
        row = conn.execute(
            "SELECT fingerprint, length FROM fingerprint WHERE id = %s",
            (id, )).fetchone()
        if row is None:
            continue
        fingerprint, length = row
        for i in range(1):
            t0 = time.time()
            for i in range(len(fingerprint)):
                fingerprint[i] ^= random.getrandbits(2) << random.randint(
                    0, 20)
            searcher = FingerprintSearcher(script.engine, script.index)
            matches = searcher.search(fingerprint,
                                      length + random.randint(-8, 8))
            track_ids = [r[1] for r in matches]
            track_mbid_map = lookup_mbids(conn, track_ids)
            mbids = set()
            for track_id, track_mbids in track_mbid_map.iteritems():
                for mbid, sources in track_mbids:
                    mbids.add(mbid)
            metadata = lookup_metadata(conn,
                                       mbids,
                                       load_releases=True,
                                       load_release_groups=True)
            print "Searching for ID", id, len(matches), time.time() - t0
        time.sleep(1)
def main(script, opts, args):
    conn = script.engine.connect()
    find_duplicates(conn, index=script.index)
    searcher = FingerprintSearcher(conn, index)
    matches = searcher.search(fingerprint['fingerprint'], fingerprint['length'])
    track_gid = None
    for m in matches:
        track_gid = m['track_gid']
        break
Exemplo n.º 7
0
 def _handle_internal(self, params):
     import time
     t = time.time()
     update_user_agent_counter(self.redis, params.application_id,
                               self.user_agent, self.user_ip)
     searcher = FingerprintSearcher(self.conn, self.index)
     searcher.max_length_diff = params.max_duration_diff
     if params.batch:
         fingerprints = params.fingerprints
     else:
         fingerprints = params.fingerprints[:1]
     all_matches = []
     for p in fingerprints:
         if p['track_gid']:
             track_id = resolve_track_gid(self.conn, p['track_gid'])
             matches = [(0, track_id, p['track_gid'], 1.0)]
         else:
             matches = searcher.search(p['fingerprint'], p['duration'])
         all_matches.append(matches)
     response = {}
     if params.batch:
         response['fingerprints'] = fps = []
         result_map = {}
         for p, matches in zip(fingerprints, all_matches):
             results = []
             fps.append({'index': p['index'], 'results': results})
             track_ids = self._inject_results(results, result_map, matches)
             update_lookup_counter(self.redis, params.application_id,
                                   bool(track_ids))
             logger.debug("Lookup from %s: %s", params.application_id,
                          list(track_ids))
     else:
         response['results'] = results = []
         result_map = {}
         self._inject_results(results, result_map, all_matches[0])
         update_lookup_counter(self.redis, params.application_id,
                               bool(result_map))
         logger.debug("Lookup from %s: %s", params.application_id,
                      result_map.keys())
     if params.meta and result_map:
         self.inject_metadata(params.meta, result_map)
     if fingerprints:
         time_per_fp = (time.time() - t) / len(fingerprints)
         update_lookup_avg_time(self.redis, time_per_fp)
     return response
Exemplo n.º 8
0
 def _handle_internal(self, params):
     import time
     t = time.time()
     update_user_agent_counter(self.redis, params.application_id, self.user_agent, self.user_ip)
     searcher = FingerprintSearcher(self.conn, self.index)
     searcher.max_length_diff = params.max_duration_diff
     if params.batch:
         fingerprints = params.fingerprints
     else:
         fingerprints = params.fingerprints[:1]
     all_matches = []
     for p in fingerprints:
         if p['track_gid']:
             track_id = resolve_track_gid(self.conn, p['track_gid'])
             matches = [(0, track_id, p['track_gid'], 1.0)]
         else:
             matches = searcher.search(p['fingerprint'], p['duration'])
         all_matches.append(matches)
     response = {}
     if params.batch:
         response['fingerprints'] = fps = []
         result_map = {}
         for p, matches in zip(fingerprints, all_matches):
             results = []
             fps.append({'index': p['index'], 'results': results})
             track_ids = self._inject_results(results, result_map, matches)
             update_lookup_counter(self.redis, params.application_id, bool(track_ids))
             logger.debug("Lookup from %s: %s", params.application_id, list(track_ids))
     else:
         response['results'] = results = []
         result_map = {}
         self._inject_results(results, result_map, all_matches[0])
         update_lookup_counter(self.redis, params.application_id, bool(result_map))
         logger.debug("Lookup from %s: %s", params.application_id, result_map.keys())
     if params.meta and result_map:
         self.inject_metadata(params.meta, result_map)
     if fingerprints:
         time_per_fp = (time.time() - t) / len(fingerprints)
         update_lookup_avg_time(self.redis, time_per_fp)
     return response
Exemplo n.º 9
0
def import_submission(ingest_db, app_db, fingerprint_db, index_pool,
                      submission):
    # type: (IngestDB, AppDB, FingerprintDB, IndexClientPool, Dict[str, Any]) -> Optional[Dict[str, Any]]
    """
    Import the given submission into the main fingerprint database
    """

    handled_at = datetime.datetime.now(pytz.utc)

    update_stmt = schema.submission.update().where(
        schema.submission.c.id == submission['id'])
    ingest_db.execute(update_stmt.values(handled=True))
    ingest_db.execute(update_stmt.values(handled=True, handled_at=handled_at))
    logger.info("Importing submission %d with MBIDs %s", submission['id'],
                submission['mbid'])

    num_unique_items = len(set(submission['fingerprint']))
    if num_unique_items < const.FINGERPRINT_MIN_UNIQUE_ITEMS:
        logger.info("Skipping, has only %d unique items", num_unique_items)
        return None

    num_query_items = fingerprint_db.execute(
        "SELECT icount(acoustid_extract_query(%(fp)s))",
        dict(fp=submission['fingerprint']))
    if not num_query_items:
        logger.info("Skipping, no data to index")
        return None

    source_id = submission['source_id']
    if source_id is not None:
        source = get_source(app_db, source_id)
        if source is None:
            logger.error("Source not found")
            return None
    else:
        source = {
            'application_id': submission['application_id'],
            'version': submission['application_version'],
            'account_id': submission['account_id'],
        }
        source_id = find_or_insert_source(app_db, source['application_id'],
                                          source['account_id'],
                                          source['version'])

    submission_result = {
        'submission_id': submission['id'],
        'created': submission['created'],
        'handled_at': handled_at,
        'account_id': source['account_id'],
        'application_id': source['application_id'],
        'application_version': source['version'],
    }

    format_id = submission['format_id']
    if format_id is None and submission['format'] is not None:
        format_id = find_or_insert_format(app_db, submission['format'])

    fingerprint = {
        'id': None,
        'track_id': None,
        'fingerprint': submission['fingerprint'],
        'length': submission['length'],
        'bitrate': submission['bitrate'],
        'format_id': format_id,
    }

    searcher = FingerprintSearcher(fingerprint_db, index_pool, fast=False)
    searcher.min_score = const.TRACK_MERGE_THRESHOLD
    matches = searcher.search(submission['fingerprint'], submission['length'])
    if matches:
        all_track_ids = set()  # type: Set[int]
        possible_track_ids = set()  # type: Set[int]
        for m in matches:
            if m.track_id in all_track_ids:
                continue
            all_track_ids.add(m.track_id)
            logger.debug("Fingerprint %d with track %d is %d%% similar",
                         m.fingerprint_id, m.track_id, m.score * 100)
            if can_add_fp_to_track(fingerprint_db, m.track_id,
                                   submission['fingerprint'],
                                   submission['length']):
                possible_track_ids.add(m.track_id)
                if not fingerprint['track_id']:
                    fingerprint['track_id'] = m.track_id
                    if m.score > const.FINGERPRINT_MERGE_THRESHOLD:
                        fingerprint['id'] = m.fingerprint_id
        if len(possible_track_ids) > 1:
            for group in can_merge_tracks(fingerprint_db, possible_track_ids):
                if fingerprint['track_id'] in group and len(group) > 1:
                    fingerprint['track_id'] = min(group)
                    group.remove(fingerprint['track_id'])
                    merge_tracks(fingerprint_db, ingest_db,
                                 fingerprint['track_id'], list(group))
                    break

    if not fingerprint['track_id']:
        fingerprint['track_id'] = insert_track(fingerprint_db)

    assert isinstance(fingerprint['track_id'], int)
    submission_result['track_id'] = fingerprint['track_id']

    if not fingerprint['id']:
        fingerprint['id'] = insert_fingerprint(fingerprint_db, ingest_db,
                                               fingerprint, submission['id'],
                                               source_id)
    else:
        assert isinstance(fingerprint['id'], int)
        inc_fingerprint_submission_count(fingerprint_db, ingest_db,
                                         fingerprint['id'], submission['id'],
                                         source_id)

    submission_result['fingerprint_id'] = fingerprint['id']

    if submission['mbid'] and submission[
            'mbid'] != '00000000-0000-0000-0000-000000000000':
        insert_mbid(fingerprint_db, ingest_db, fingerprint['track_id'],
                    submission['mbid'], submission['id'], source_id)
        submission_result['mbid'] = submission['mbid']

    if submission['puid'] and submission[
            'puid'] != '00000000-0000-0000-0000-000000000000':
        insert_puid(fingerprint_db, ingest_db, fingerprint['track_id'],
                    submission['puid'], submission['id'], source_id)
        submission_result['puid'] = submission['puid']

    if submission['meta_id'] or submission['meta']:
        meta_id = submission['meta_id']  # type: Optional[int]
        meta_gid = None  # type: Optional[uuid.UUID]
        if meta_id is None:
            meta_id, meta_gid = find_or_insert_meta(fingerprint_db,
                                                    submission['meta'])
        else:
            found, meta_gid = check_meta_id(fingerprint_db, meta_id)
            if not found:
                logger.error("Meta not found")
                meta_id = None
        if meta_id is not None:
            insert_track_meta(fingerprint_db, ingest_db,
                              fingerprint['track_id'], meta_id,
                              submission['id'], source_id)
            submission_result['meta_id'] = meta_id
            submission_result['meta_gid'] = meta_gid

    if submission['foreignid_id'] or submission['foreignid']:
        foreignid_id = submission['foreignid_id']
        if foreignid_id is None:
            foreignid = submission['foreignid']
            foreignid_id = find_or_insert_foreignid(fingerprint_db, foreignid)
        else:
            foreignid = get_foreignid(fingerprint_db, foreignid_id)
        insert_track_foreignid(fingerprint_db, ingest_db,
                               fingerprint['track_id'], foreignid_id,
                               submission['id'], source_id)
        submission_result['foreignid'] = foreignid

    insert_submission_result(ingest_db, submission_result)

    return fingerprint
Exemplo n.º 10
0
    def _handle_internal(self, params):
        # type: (APIHandlerParams) -> Dict[str, Any]
        assert isinstance(params, LookupHandlerParams)

        import time
        t = time.time()

        update_user_agent_counter(self.ctx.redis, params.application_id,
                                  str(self.user_agent), self.user_ip)

        searcher = FingerprintSearcher(
            self.ctx.db.get_fingerprint_db(read_only=True), self.ctx.index)
        assert params.max_duration_diff is not None
        searcher.max_length_diff = params.max_duration_diff

        if params.batch:
            fingerprints = params.fingerprints
        else:
            fingerprints = params.fingerprints[:1]

        all_matches = []
        for p in fingerprints:
            if isinstance(p, TrackLookupQuery):
                track_id = resolve_track_gid(
                    self.ctx.db.get_fingerprint_db(read_only=True),
                    p.track_gid)
                if track_id:
                    matches = [
                        FingerprintMatch(fingerprint_id=0,
                                         track_id=track_id,
                                         track_gid=p.track_gid,
                                         score=1.0)
                    ]
                else:
                    matches = []
            elif isinstance(p, FingerprintLookupQuery):
                matches = searcher.search(p.fingerprint, p.duration)
            all_matches.append(matches)

        response = {}  # type: Dict[str, Any]
        if params.batch:
            response['fingerprints'] = fps = []
            result_map = {}  # type: ignore
            for p, matches in zip(fingerprints, all_matches):
                results = []  # type: ignore
                fps.append({'index': p.index, 'results': results})
                track_ids = self._inject_results(results, result_map, matches)
                update_lookup_counter(self.ctx.redis, params.application_id,
                                      bool(track_ids))
                logger.debug("Lookup from %s: %s", params.application_id,
                             list(track_ids))
        else:
            response['results'] = results = []
            result_map = {}
            self._inject_results(results, result_map, all_matches[0])
            update_lookup_counter(self.ctx.redis, params.application_id,
                                  bool(result_map))
            logger.debug("Lookup from %s: %s", params.application_id,
                         result_map.keys())

        if params.meta and result_map:
            self.inject_metadata(params.meta, result_map)

        if fingerprints:
            time_per_fp = (time.time() - t) / len(fingerprints)
            update_lookup_avg_time(self.ctx.redis, time_per_fp)

        return response
Exemplo n.º 11
0
    def _handle_internal(self, params):
        # type: (APIHandlerParams) -> Dict[str, Any]
        assert isinstance(params, LookupHandlerParams)

        import time
        t = time.time()

        if self.ctx.statsd is not None:
            statsd = self.ctx.statsd.pipeline()
        else:
            statsd = None

        update_user_agent_counter(self.ctx.redis, params.application_id,
                                  str(self.user_agent), self.user_ip)

        searcher = FingerprintSearcher(
            self.ctx.db.get_fingerprint_db(read_only=True),
            self.ctx.index,
            timeout=self.ctx.config.website.search_timeout,
        )
        assert params.max_duration_diff is not None
        searcher.max_length_diff = params.max_duration_diff

        if params.batch:
            fingerprints = params.fingerprints
        else:
            fingerprints = params.fingerprints[:1]

        max_results = 10

        all_matches = []
        for p in fingerprints:
            if isinstance(p, TrackLookupQuery):
                track_id = resolve_track_gid(
                    self.ctx.db.get_fingerprint_db(read_only=True),
                    p.track_gid)
                if track_id:
                    matches = [
                        FingerprintMatch(fingerprint_id=0,
                                         track_id=track_id,
                                         track_gid=p.track_gid,
                                         score=1.0)
                    ]
                else:
                    matches = []
            elif isinstance(p, FingerprintLookupQuery):
                fingerprint_search_t0 = time.time()
                matches = searcher.search(p.fingerprint,
                                          p.duration,
                                          max_results=max_results)
                fingerprint_search_t1 = time.time()
                if statsd is not None:
                    statsd.incr('api.lookup.matches', len(matches))
                    statsd.timing(
                        'api.lookup.fingerprint_search',
                        fingerprint_search_t1 - fingerprint_search_t0)
            all_matches.append(matches)

        self.ctx.db.session.close()

        response = {}  # type: Dict[str, Any]
        if params.batch:
            response['fingerprints'] = fps = []
            result_map = {}  # type: ignore
            for p, matches in zip(fingerprints, all_matches):
                results = []  # type: ignore
                fps.append({'index': p.index, 'results': results})
                track_ids = self._inject_results(results, result_map, matches)
                update_lookup_counter(self.ctx.redis, params.application_id,
                                      bool(track_ids))
                logger.debug("Lookup from %s: %s", params.application_id,
                             list(track_ids))
        else:
            response['results'] = results = []
            result_map = {}
            self._inject_results(results, result_map, all_matches[0])
            update_lookup_counter(self.ctx.redis, params.application_id,
                                  bool(result_map))
            logger.debug("Lookup from %s: %s", params.application_id,
                         result_map.keys())

        if self.ctx.config.website.search_return_metadata:
            if params.meta and result_map:
                inject_metadata_t0 = time.time()
                self.inject_metadata(params.meta, result_map)
                inject_metadata_t1 = time.time()
                if statsd is not None:
                    statsd.timing('api.lookup.inject_metadata',
                                  inject_metadata_t1 - inject_metadata_t0)

        if fingerprints:
            time_total = (time.time() - t)
            time_per_fp = time_total / len(fingerprints)
            update_lookup_avg_time(self.ctx.redis, time_per_fp)
            if statsd is not None:
                statsd.timing('api.lookup.total', time_total)

        if statsd is not None:
            statsd.send()

        return response
Exemplo n.º 12
0
def import_submission(conn, submission, index=None):
    """
    Import the given submission into the main fingerprint database
    """
    with conn.begin():
        update_stmt = schema.submission.update().where(
            schema.submission.c.id == submission['id'])
        conn.execute(update_stmt.values(handled=True))
        mbids = []
        if submission['mbid']:
            mbids.append(resolve_mbid_redirect(conn, submission['mbid']))
        if submission['puid']:
            min_duration = submission['length'] - 15
            max_duration = submission['length'] + 15
            mbids.extend(find_puid_mbids(conn, submission['puid'], min_duration, max_duration))
        logger.info("Importing submission %d with MBIDs %s",
            submission['id'], ', '.join(mbids))
        num_unique_items = len(set(submission['fingerprint']))
        if num_unique_items < const.FINGERPRINT_MIN_UNIQUE_ITEMS:
            logger.info("Skipping, has only %d unique items", num_unique_items)
            return
        num_query_items = conn.execute("SELECT icount(acoustid_extract_query(%(fp)s))", dict(fp=submission['fingerprint']))
        if not num_query_items:
            logger.info("Skipping, no data to index")
            return
        searcher = FingerprintSearcher(conn, index, fast=False)
        searcher.min_score = const.TRACK_MERGE_THRESHOLD
        matches = searcher.search(submission['fingerprint'], submission['length'])
        fingerprint = {
            'id': None,
            'track_id': None,
            'fingerprint': submission['fingerprint'],
            'length': submission['length'],
            'bitrate': submission['bitrate'],
            'format_id': submission['format_id'],
        }
        if matches:
            match = matches[0]
            all_track_ids = set()
            possible_track_ids = set()
            for m in matches:
                if m['track_id'] in all_track_ids:
                    continue
                all_track_ids.add(m['track_id'])
                logger.debug("Fingerprint %d with track %d is %d%% similar", m['id'], m['track_id'], m['score'] * 100)
                if can_add_fp_to_track(conn, m['track_id'], submission['fingerprint'], submission['length']):
                    possible_track_ids.add(m['track_id'])
                    if not fingerprint['track_id']:
                        fingerprint['track_id'] = m['track_id']
                        if m['score'] > const.FINGERPRINT_MERGE_THRESHOLD:
                            fingerprint['id'] = m['id']
            if len(possible_track_ids) > 1:
                for group in can_merge_tracks(conn, possible_track_ids):
                    if fingerprint['track_id'] in group and len(group) > 1:
                        fingerprint['track_id'] = min(group)
                        group.remove(fingerprint['track_id'])
                        merge_tracks(conn, fingerprint['track_id'], list(group))
                        break
        if not fingerprint['track_id']:
            fingerprint['track_id'] = insert_track(conn)
        if not fingerprint['id']:
            fingerprint['id'] = insert_fingerprint(conn, fingerprint, submission['id'], submission['source_id'])
        else:
            inc_fingerprint_submission_count(conn, fingerprint['id'], submission['id'], submission['source_id'])
        for mbid in mbids:
            insert_mbid(conn, fingerprint['track_id'], mbid, submission['id'], submission['source_id'])
        if submission['puid'] and submission['puid'] != '00000000-0000-0000-0000-000000000000':
            insert_puid(conn, fingerprint['track_id'], submission['puid'], submission['id'], submission['source_id'])
        if submission['meta_id']:
            insert_track_meta(conn, fingerprint['track_id'], submission['meta_id'], submission['id'], submission['source_id'])
        if submission['foreignid_id']:
            insert_track_foreignid(conn, fingerprint['track_id'], submission['foreignid_id'], submission['id'], submission['source_id'])
        return fingerprint
Exemplo n.º 13
0
def import_submission(conn, submission, index=None):
    """
    Import the given submission into the main fingerprint database
    """
    with conn.begin():
        update_stmt = schema.submission.update().where(
            schema.submission.c.id == submission['id'])
        conn.execute(update_stmt.values(handled=True))
        mbids = []
        if submission['mbid']:
            mbids.append(resolve_mbid_redirect(conn, submission['mbid']))
        logger.info("Importing submission %d with MBIDs %s", submission['id'],
                    ', '.join(mbids))
        num_unique_items = len(set(submission['fingerprint']))
        if num_unique_items < const.FINGERPRINT_MIN_UNIQUE_ITEMS:
            logger.info("Skipping, has only %d unique items", num_unique_items)
            return
        num_query_items = conn.execute(
            "SELECT icount(acoustid_extract_query(%(fp)s))",
            dict(fp=submission['fingerprint']))
        if not num_query_items:
            logger.info("Skipping, no data to index")
            return
        searcher = FingerprintSearcher(conn, index, fast=False)
        searcher.min_score = const.TRACK_MERGE_THRESHOLD
        matches = searcher.search(submission['fingerprint'],
                                  submission['length'])
        fingerprint = {
            'id': None,
            'track_id': None,
            'fingerprint': submission['fingerprint'],
            'length': submission['length'],
            'bitrate': submission['bitrate'],
            'format_id': submission['format_id'],
        }
        if matches:
            all_track_ids = set()
            possible_track_ids = set()
            for m in matches:
                if m['track_id'] in all_track_ids:
                    continue
                all_track_ids.add(m['track_id'])
                logger.debug("Fingerprint %d with track %d is %d%% similar",
                             m['id'], m['track_id'], m['score'] * 100)
                if can_add_fp_to_track(conn, m['track_id'],
                                       submission['fingerprint'],
                                       submission['length']):
                    possible_track_ids.add(m['track_id'])
                    if not fingerprint['track_id']:
                        fingerprint['track_id'] = m['track_id']
                        if m['score'] > const.FINGERPRINT_MERGE_THRESHOLD:
                            fingerprint['id'] = m['id']
            if len(possible_track_ids) > 1:
                for group in can_merge_tracks(conn, possible_track_ids):
                    if fingerprint['track_id'] in group and len(group) > 1:
                        fingerprint['track_id'] = min(group)
                        group.remove(fingerprint['track_id'])
                        merge_tracks(conn, fingerprint['track_id'],
                                     list(group))
                        break
        if not fingerprint['track_id']:
            fingerprint['track_id'] = insert_track(conn)
        if not fingerprint['id']:
            fingerprint['id'] = insert_fingerprint(conn, fingerprint,
                                                   submission['id'],
                                                   submission['source_id'])
        else:
            inc_fingerprint_submission_count(conn, fingerprint['id'],
                                             submission['id'],
                                             submission['source_id'])
        for mbid in mbids:
            insert_mbid(conn, fingerprint['track_id'], mbid, submission['id'],
                        submission['source_id'])
        if submission['puid'] and submission[
                'puid'] != '00000000-0000-0000-0000-000000000000':
            insert_puid(conn, fingerprint['track_id'], submission['puid'],
                        submission['id'], submission['source_id'])
        if submission['meta_id']:
            insert_track_meta(conn, fingerprint['track_id'],
                              submission['meta_id'], submission['id'],
                              submission['source_id'])
        if submission['foreignid_id']:
            insert_track_foreignid(conn, fingerprint['track_id'],
                                   submission['foreignid_id'],
                                   submission['id'], submission['source_id'])
        return fingerprint