示例#1
0
def _create_distance_signature(signatures_map, uuid):
    """Create a signature for a similarity measurement.

    Given a UUID as the parameter, the method
    creates a signature, which will be used to calculate
    the similarity against claimed signatures.

    :param uuid:
        A string representing UUID of a given signature.

        Example:
            uuid = 'd63537a8-1df4-4436-b5ed-224da5b5028c'

    :return:
        Example:
            {u'affiliations': u'Yerevan Phys. Inst.',
             u'publication_id': u'13c3cca8-b0bf-42f5-90d4-e3dfcced0511',
             u'full_name': u'Chatrchyan, Serguei',
             u'uuid': u'd63537a8-1df4-4436-b5ed-224da5b5028c'}

    """
    record = create_beard_record(
        signatures_map[uuid].get('publication_id'))

    beard_signature = signatures_map[uuid].copy()
    beard_signature['publication'] = record

    return beard_signature
示例#2
0
def _create_distance_signature(signatures_map, uuid):
    """Create a signature for a similarity measurement.

    Given a UUID as the parameter, the method
    creates a signature, which will be used to calculate
    the similarity against claimed signatures.

    :param uuid:
        A string representing UUID of a given signature.

        Example:
            uuid = 'd63537a8-1df4-4436-b5ed-224da5b5028c'

    :return:
        Example:
            {u'affiliations': u'Yerevan Phys. Inst.',
             u'publication_id': u'13c3cca8-b0bf-42f5-90d4-e3dfcced0511',
             u'full_name': u'Chatrchyan, Serguei',
             u'uuid': u'd63537a8-1df4-4436-b5ed-224da5b5028c'}

    """
    record = create_beard_record(signatures_map[uuid].get('publication_id'))

    beard_signature = signatures_map[uuid].copy()
    beard_signature['publication'] = record

    return beard_signature
示例#3
0
def disambiguation_clustering(phonetic_block):
    """Cluster phonetic blocks in parallel.

    The method receives a phonetic block as an argument.
    In order to proceed with clustering, the method
    creates two lists representing records containing given
    phonetic block (required by Beard) and signatures of the block.
    """
    try:
        logger.info("Clustering: %s" % phonetic_block)

        records = []
        signatures = []

        # Get all the records containing specific phonetic block.
        records_ids = get_records_from_block(phonetic_block)

        # Create records and signatures in Beard readable format.
        for record_id in records_ids:
            records.append(create_beard_record(record_id))
            signatures.extend(create_beard_signatures(
                record_id, phonetic_block))

        # Dispatch clustering job to Beard Celery service.
        try:
            clusters_matched, clusters_created = make_beard_clusters(
                records, signatures).get()
        except AttributeError:
            clusters_matched = {}
            clusters_created = {}

        # Update recids of signatures to existing profiles.
        if clusters_matched:
            for profile_recid, beard_uuids in clusters_matched.iteritems():
                process_clusters(beard_uuids, signatures, profile_recid)

        # Create new profiles.
        if clusters_created:
            for beard_uuids in list(clusters_created.values()):
                process_clusters(beard_uuids, signatures)

        db.session.commit()
    finally:
        db.session.close()
示例#4
0
def disambiguation_clustering(phonetic_block):
    """Cluster phonetic blocks in parallel.

    The method receives a phonetic block as an argument.
    In order to proceed with clustering, the method
    creates two lists representing records containing given
    phonetic block (required by Beard) and signatures of the block.
    """
    try:
        logger.info("Clustering: %s" % phonetic_block)

        records = []
        signatures = []

        # Get all the records containing specific phonetic block.
        records_ids = get_records_from_block(phonetic_block)

        # Create records and signatures in Beard readable format.
        for record_id in records_ids:
            records.append(create_beard_record(record_id))
            signatures.extend(
                create_beard_signatures(record_id, phonetic_block))

        # Dispatch clustering job to Beard Celery service.
        try:
            clusters_matched, clusters_created = make_beard_clusters(
                records, signatures).get()
        except AttributeError:
            clusters_matched = {}
            clusters_created = {}

        # Update recids of signatures to existing profiles.
        if clusters_matched:
            for profile_recid, beard_uuids in clusters_matched.iteritems():
                process_clusters(beard_uuids, signatures, profile_recid)

        # Create new profiles.
        if clusters_created:
            for beard_uuids in list(clusters_created.values()):
                process_clusters(beard_uuids, signatures)

        db.session.commit()
    finally:
        db.session.close()