Python find_best_match примеры использования

Язык программирования: Python

Пространство имен/Пакет: cl.lib.string_diff

Метод/Функция: find_best_match

Примеров на hotexamples.com: 3

Python find_best_match - 3 примера найдено. Это лучшие примеры Python кода для cl.lib.string_diff.find_best_match, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Пример #1

Показать файл

Файл: tasks.py Проект: weiplanet/courtlistener

def do_heuristic_match(idb_row, ds):
    """Use cosine similarity of case names from the IDB to try to find a match
    out of several possibilities in the DB.

    :param idb_row: The FJC IDB row to match against
    :param ds: A list of Dockets that might match
    :returns: The best-matching Docket in ds if possible, else None
    """
    case_names = []
    for d in ds:
        case_name = harmonize(d.case_name)
        parts = case_name.lower().split(" v. ")
        if len(parts) == 1:
            case_names.append(case_name)
        elif len(parts) == 2:
            plaintiff, defendant = parts[0], parts[1]
            case_names.append("%s v. %s" % (plaintiff[0:30], defendant[0:30]))
        elif len(parts) > 2:
            case_names.append(case_name)
    idb_case_name = harmonize(
        "%s v. %s" % (idb_row.plaintiff, idb_row.defendant)
    )
    results = find_best_match(case_names, idb_case_name, case_sensitive=False)
    if results["ratio"] > 0.65:
        logger.info(
            "Found good match by case name for %s: %s",
            idb_case_name,
            results["match_str"],
        )
        d = ds[results["match_index"]]
    else:
        logger.info(
            "No good match after office and case name filtering. Creating "
            "new item: %s",
            idb_row,
        )
        d = None
    return d

Пример #2

Показать файл

    def do_second_pass(options):
        """In the first pass, we ignored the duplicates that we got, preferring
        to let them stack up for later analysis. In this pass, we attempt to
        merge those failed items into the DB by more aggressive filtering and
        algorithmic selection.
        """
        idb_rows = FjcIntegratedDatabase.objects.filter(
            dataset_source=CV_2017,
            docket__isnull=True,
        ).order_by('pk')
        for i, idb_row in enumerate(queryset_generator(idb_rows)):
            # Iterate over all items in the IDB and find them in the Docket
            # table. If they're not there, create a new item.
            if i < options['offset']:
                continue
            if i >= options['limit'] > 0:
                break

            ds = Docket.objects.filter(
                docket_number_core=idb_row.docket_number,
                court=idb_row.district,
                docket_number__startswith='%s:' %
                idb_row.office).exclude(docket_number__icontains='cr').exclude(
                    case_name__icontains="sealed").exclude(
                        case_name__icontains='suppressed').exclude(
                            case_name__icontains='search warrant')
            count = ds.count()

            if count == 0:
                logger.info("%s: Creating new docket for IDB row: %s", i,
                            idb_row)
                create_new_docket_from_idb(idb_row.pk)
                continue
            elif count == 1:
                d = ds[0]
                logger.info("%s: Merging Docket %s with IDB row: %s", i, d,
                            idb_row)
                merge_docket_with_idb(d.pk, idb_row.pk)
                continue

            logger.info(
                "%s: Still have %s results after office and civil "
                "docket number filtering. Filtering further.", i, count)

            case_names = []
            for d in ds:
                case_name = harmonize(d.case_name)
                parts = case_name.lower().split(' v. ')
                if len(parts) == 1:
                    case_names.append(case_name)
                elif len(parts) == 2:
                    plaintiff, defendant = parts[0], parts[1]
                    case_names.append('%s v. %s' %
                                      (plaintiff[0:30], defendant[0:30]))
                elif len(parts) > 2:
                    case_names.append(case_name)
            idb_case_name = harmonize('%s v. %s' %
                                      (idb_row.plaintiff, idb_row.defendant))
            results = find_best_match(case_names,
                                      idb_case_name,
                                      case_sensitive=False)

            if results['ratio'] > 0.65:
                logger.info("%s Found good match by case name for %s: %s", i,
                            idb_case_name, results['match_str'])
                d = ds[results['match_index']]
                merge_docket_with_idb(d.pk, idb_row.pk)
            else:
                logger.info(
                    "%s No good match after office and case name "
                    "filtering. Creating new item: %s", i, idb_row)
                create_new_docket_from_idb(idb_row.pk)

Пример #3

Показать файл

Файл: merge_idb_into_dockets.py Проект: freelawproject/courtlistener

    def do_second_pass(options):
        """In the first pass, we ignored the duplicates that we got, preferring
        to let them stack up for later analysis. In this pass, we attempt to
        merge those failed items into the DB by more aggressive filtering and
        algorithmic selection.
        """
        idb_rows = FjcIntegratedDatabase.objects.filter(
            dataset_source=CV_2017,
            docket__isnull=True,
        ).order_by('pk')
        for i, idb_row in enumerate(queryset_generator(idb_rows)):
            # Iterate over all items in the IDB and find them in the Docket
            # table. If they're not there, create a new item.
            if i < options['offset']:
                continue
            if i >= options['limit'] > 0:
                break

            ds = Docket.objects.filter(
                docket_number_core=idb_row.docket_number,
                court=idb_row.district,
                docket_number__startswith='%s:' % idb_row.office
            ).exclude(
                docket_number__icontains='cr'
            ).exclude(
                case_name__icontains="sealed"
            ).exclude(
                case_name__icontains='suppressed'
            ).exclude(
                case_name__icontains='search warrant'
            )
            count = ds.count()

            if count == 0:
                logger.info("%s: Creating new docket for IDB row: %s",
                            i, idb_row)
                create_new_docket_from_idb(idb_row.pk)
                continue
            elif count == 1:
                d = ds[0]
                logger.info("%s: Merging Docket %s with IDB row: %s",
                            i, d, idb_row)
                merge_docket_with_idb(d.pk, idb_row.pk)
                continue

            logger.info("%s: Still have %s results after office and civil "
                        "docket number filtering. Filtering further.",
                        i, count)

            case_names = []
            for d in ds:
                case_name = harmonize(d.case_name)
                parts = case_name.lower().split(' v. ')
                if len(parts) == 1:
                    case_names.append(case_name)
                elif len(parts) == 2:
                    plaintiff, defendant = parts[0], parts[1]
                    case_names.append(
                        '%s v. %s' % (plaintiff[0:30], defendant[0:30])
                    )
                elif len(parts) > 2:
                    case_names.append(case_name)
            idb_case_name = harmonize('%s v. %s' % (idb_row.plaintiff,
                                                    idb_row.defendant))
            results = find_best_match(case_names, idb_case_name,
                                      case_sensitive=False)

            if results['ratio'] > 0.65:
                logger.info("%s Found good match by case name for %s: %s",
                            i, idb_case_name, results['match_str'])
                d = ds[results['match_index']]
                merge_docket_with_idb(d.pk, idb_row.pk)
            else:
                logger.info("%s No good match after office and case name "
                            "filtering. Creating new item: %s", i, idb_row)
                create_new_docket_from_idb(idb_row.pk)