Пример #1
0
def _search_company(request, country_obj=None):
    query = request.GET.get("q", "")

    _fields = [
        "name_uk",
        "short_name_uk",
        "name_en",
        "short_name_en",
        "related_persons.person_uk",
        "related_persons.person_en",
        "other_founders",
        "other_recipient",
        "other_owners",
        "other_managers",
        "bank_name",
        "edrpou",
        "code_chunks",
        "related_countries.to_country_uk",
        "related_countries.to_country_en",
    ]

    if query:
        companies = ElasticCompany.search().query("multi_match",
                                                  query=query,
                                                  operator="and",
                                                  fields=_fields)
        if companies.count() == 0:
            # PLAN B, PLAN B
            companies = ElasticCompany.search().query(
                "multi_match",
                query=query,
                operator="or",
                minimum_should_match="2",
                fields=_fields,
            )

    else:
        companies = ElasticCompany.search().query("match_all")

    if country_obj is not None:
        companies = companies.query("match",
                                    related_countries__to_country_uk={
                                        "query": country_obj.name_uk,
                                        "operator": "and"
                                    })

    return paginated_search(
        request,
        # We are using highlight here to find which exact related person
        # caused the match to show it in the person's card on the top of the
        # list. Check Person.relevant_related_persons method for details
        companies.highlight("related_persons.person_uk",
                            order="score",
                            pre_tags=[""],
                            post_tags=[""]).highlight(
                                "related_persons.person_en",
                                order="score",
                                pre_tags=[""],
                                post_tags=[""]),
    )
Пример #2
0
    def handle(self, *args, **options):
        activate(settings.LANGUAGE_CODE)
        conn = connections.get_connection('default')

        person_qs = Person.objects.filter(publish=True)
        docs_to_index = [
            ElasticPerson(**p.to_dict())
            for p in tqdm(person_qs.nocache().iterator(), total=person_qs.count())
        ]

        if options["drop_indices"]:
            Index(ElasticPerson._doc_type.index).delete(ignore=404)
            ElasticPerson.init()

            conn.indices.put_settings(
                index=ElasticPerson._doc_type.index,
                body={
                    'index.max_result_window': 100000
                }
            )

        self.bulk_write(conn, docs_to_index)

        if options["drop_indices"]:
            # invalidate old values and immediatelly cache again
            ElasticPerson.get_all_persons.invalidate(ElasticPerson)
            ElasticPerson.get_all_persons()

        self.stdout.write(
            'Loaded {} persons to persistence storage'.format(
                len(docs_to_index)))

        company_qs = Company.objects.filter(publish=True)
        docs_to_index = [
            ElasticCompany(**p.to_dict())
            for p in tqdm(company_qs.nocache().iterator(), total=company_qs.count())]

        if options["drop_indices"]:
            Index(ElasticCompany._doc_type.index).delete(ignore=404)
            ElasticCompany.init()
            conn.indices.put_settings(
                index=ElasticCompany._doc_type.index,
                body={
                    'index.max_result_window': 100000
                }
            )

        self.bulk_write(conn, docs_to_index)

        if options["drop_indices"]:
            # invalidate old values and immediatelly cache again
            ElasticCompany.get_all_companies.invalidate(ElasticCompany)
            ElasticCompany.get_all_companies()

        self.stdout.write(
            'Loaded {} companies to persistence storage'.format(
                len(docs_to_index)))
Пример #3
0
def export_companies(request, fmt):
    if not request.user.has_perm("core.export_companies"):
        return HttpResponseForbidden()

    data = map(
        lambda p: blacklist(
            add_encrypted_url(p, request.user, "encrypted_company_redirect"),
            ["id"]),
        ElasticCompany.get_all_companies(),
    )

    ActionLog(user=request.user,
              action="download_companies_dataset",
              details=fmt).save()

    if fmt == "json":
        response = JsonResponse(data, safe=False)

    if fmt == "xml":
        response = render(request,
                          "xml.jinja", {"data": data},
                          content_type="application/xhtml+xml")

    response[
        "Content-Disposition"] = "attachment; filename=companies_{:%Y%m%d_%H%M}.{}".format(
            datetime.now(), fmt)

    response["Content-Length"] = len(response.content)

    return response
Пример #4
0
def countries(request, sources=("persons", "companies"), country_id=None):
    country = None
    if country_id is not None:
        country = get_object_or_404(Country, iso2=country_id)

    used_countries = (Country.objects.annotate(
        persons_count=Count("person2country", distinct=True),
        companies_count=Count("company2country", distinct=True),
    ).annotate(usages=F("persons_count") + F("companies_count")).exclude(
        usages=0).exclude(iso2="").order_by("-usages"))

    params = {"used_countries": used_countries, "country": country}

    if "persons" in sources:
        if country_id is None:
            persons = ElasticPerson.search().query("match_all")
        else:
            persons = ElasticPerson.search().query(
                "match",
                related_countries__to_country_uk={
                    "query": country.name_uk,
                    "operator": "and"
                })

    if "companies" in sources:
        if country_id is None:
            companies = ElasticCompany.search().query("match_all")
        else:
            companies = ElasticCompany.search().query(
                "match",
                related_countries__to_country_uk={
                    "query": country.name_uk,
                    "operator": "and"
                })

    try:
        params["persons"] = paginated_search(request, persons)
        params["companies"] = paginated_search(request, companies)
    except EmptyPage:
        raise Http404("Page is empty")
    except PageNotAnInteger:
        raise Http404("No page")

    return render(request, "countries.jinja", params)
Пример #5
0
def search(request, sources=("persons", "companies")):
    query = request.GET.get("q", "")
    is_exact = request.GET.get("is_exact", "") == "on"

    params = {"query": query, "sources": sources, "today": datetime.now()}

    if is_exact:
        persons = ElasticPerson.search().query(
            "multi_match",
            query=query,
            operator="and",
            fields=[
                "full_name",
                "names",
                "full_name_en",
                "also_known_as_uk",
                "also_known_as_en",
            ],
        )

        # Special case when we were looking for one exact person and found it.
        if persons.count() == 1:
            person = persons.execute()[0]

            return redirect(
                reverse("person_details", kwargs={"person_id": person.id}))

        companies = ElasticCompany.search().query(
            "multi_match",
            query=query,
            operator="and",
            fields=["short_name_en", "short_name_uk", "name_en", "name_uk"],
        )

        # Special case when we were looking for one exact company and found it.
        if companies.count() == 1:
            company = companies.execute()[0]

            return redirect(
                reverse("company_details", kwargs={"company_id": company.id}))

    try:
        if "persons" in sources:
            params["persons"] = _search_person(request)

            if not params["persons"]:
                params["suggested_person"] = _suggest_person(request)

        if "companies" in sources:
            params["companies"] = _search_company(request)
    except EmptyPage:
        raise Http404("Page is empty")
    except PageNotAnInteger:
        raise Http404("No page")

    return render(request, "search.jinja", params)
Пример #6
0
def countries(request, sources=("persons", "companies"), country_id=None):
    country = None
    if country_id is not None:
        country = get_object_or_404(Country, iso2=country_id)

    params = {
        "country": country,
        "today": now(),
        "query": "",
        "include_related_persons": False,
    }

    if "persons" in sources:
        if country_id is None:
            persons = ElasticPerson.search().query("match_all")
        else:
            persons = ElasticPerson.search().query(
                "match",
                related_countries__to_country_uk={
                    "query": country.name_uk,
                    "operator": "and"
                })

    if "companies" in sources:
        if country_id is None:
            companies = ElasticCompany.search().query("match_all")
        else:
            companies = ElasticCompany.search().query(
                "match",
                related_countries__to_country_uk={
                    "query": country.name_uk,
                    "operator": "and"
                })

    try:
        params["persons"] = paginated_search(request, persons)
        params["companies"] = paginated_search(request, companies)
    except EmptyPage:
        raise Http404("Page is empty")
    except PageNotAnInteger:
        raise Http404("No page")

    return render(request, "countries.jinja", params)
Пример #7
0
    def assume(q, fuzziness):
        results = []

        search = (ElasticPerson.search().source(["full_name_suggest", field
                                                 ]).params(size=0).suggest(
                                                     "name",
                                                     q,
                                                     completion={
                                                         "field":
                                                         "full_name_suggest",
                                                         "size": 10,
                                                         "fuzzy": {
                                                             "fuzziness":
                                                             fuzziness,
                                                             "unicode_aware":
                                                             True
                                                         },
                                                     },
                                                 ))

        res = search.execute()
        if res.success:
            results += res.suggest["name"][0]["options"]

        search = (ElasticCompany.search().source(
            ["name_suggest", company_field]).params(size=0).suggest(
                "name",
                q,
                completion={
                    "field": "name_suggest",
                    "size": 5,
                    "fuzzy": {
                        "fuzziness": fuzziness,
                        "unicode_aware": True
                    },
                },
            ))

        # TODO: Investigate, completion doesn't work with numbers

        res = search.execute()
        if res.success and hasattr(res, "suggest"):
            results += res.suggest["name"][0]["options"]

        results = sorted(results, key=itemgetter("_score"), reverse=True)

        if results:
            return unique(
                getattr(val._source, company_field, "")
                or getattr(val._source, field, "") for val in results)
        else:
            return []
Пример #8
0
    def handle(self, *args, **options):
        activate(settings.LANGUAGE_CODE)
        conn = connections.get_connection("default")

        person_qs = Person.objects.filter(publish=True)
        docs_to_index = [
            ElasticPerson(**p.to_dict())
            for p in tqdm(person_qs.nocache().iterator(), total=person_qs.count())
        ]

        persons_total = len(docs_to_index)

        if options["drop_indices"]:
            person_idx.delete(ignore=404)
            person_idx.create()

            ElasticPerson.init()

            conn.indices.put_settings(
                index=ElasticPerson._doc_type.index,
                body={"index.max_result_window": settings.ES_MAX_RESULT_WINDOW},
            )

        self.bulk_write(conn, docs_to_index)

        self.stdout.write(
            "Loaded {} persons to persistence storage".format(len(docs_to_index))
        )

        company_qs = Company.objects.filter(publish=True)
        docs_to_index = [
            ElasticCompany(**p.to_dict())
            for p in tqdm(company_qs.nocache().iterator(), total=company_qs.count())
        ]

        companies_total = len(docs_to_index)

        if options["drop_indices"]:
            company_idx.delete(ignore=404)
            company_idx.create()

            ElasticCompany.init()
            conn.indices.put_settings(
                index=ElasticCompany._doc_type.index,
                body={"index.max_result_window": settings.ES_MAX_RESULT_WINDOW},
            )

        self.bulk_write(conn, docs_to_index)

        self.stdout.write(
            "Loaded {} companies to persistence storage".format(len(docs_to_index))
        )

        if options["drop_indices"]:
            sleep(60)
            # invalidate old values and immediatelly cache again
            ElasticPerson.get_all_persons.invalidate(ElasticPerson)
            indexed_persons_total = len(ElasticPerson.get_all_persons())

            # invalidate old values and immediatelly cache again
            ElasticCompany.get_all_companies.invalidate(ElasticCompany)
            indexed_companies_total = len(ElasticCompany.get_all_companies())

            if persons_total != indexed_persons_total:
                self.stderr.write(
                    "Mismatch between persons in DB ({}) and indexed persons ({})".format(
                        persons_total, indexed_persons_total
                    )
                )

            if companies_total != indexed_companies_total:
                self.stderr.write(
                    "Mismatch between companies in DB ({}) and indexed companies ({})".format(
                        companies_total, indexed_companies_total
                    )
                )
Пример #9
0
def search(request, sources=("persons", "companies")):
    query = request.GET.get("q", "")
    country = request.GET.get("country", "")

    if country:
        country_obj = Country.objects.filter(iso2=country).first()
    else:
        country_obj = None

    is_exact = request.GET.get("is_exact", "") == "on"

    params = {
        "query": query,
        "sources": sources,
        "today": now(),
        "country_obj": country_obj,
        "include_related_persons": True,
    }

    if is_exact:
        persons = ElasticPerson.search().query(
            "multi_match",
            query=query,
            operator="and",
            fields=[
                "full_name",
                "names",
                "full_name_en",
                "also_known_as_uk",
                "also_known_as_en",
            ],
        )

        if country_obj is not None:
            persons = persons.query(
                "match",
                related_countries__to_country_uk={
                    "query": country_obj.name_uk,
                    "operator": "and",
                },
            )

        # Special case when we were looking for one exact person and found it.
        if persons.count() == 1:
            person = persons.execute()[0]

            return redirect(
                reverse("person_details", kwargs={"person_id": person.id}))

        companies = ElasticCompany.search().query(
            "multi_match",
            query=query,
            operator="and",
            fields=["short_name_en", "short_name_uk", "name_en", "name_uk"],
        )

        if country_obj is not None:
            companies = companies.query(
                "match",
                related_countries__to_country_uk={
                    "query": country_obj.name_uk,
                    "operator": "and",
                },
            )

        # Special case when we were looking for one exact company and found it.
        if companies.count() == 1:
            company = companies.execute()[0]

            return redirect(
                reverse("company_details", kwargs={"company_id": company.id}))

    try:
        if "persons" in sources:
            params["persons"] = _search_person(request, country_obj)

            if not params["persons"]:
                params["suggested_person"] = _suggest_person(request)

        if "companies" in sources:
            params["companies"] = _search_company(request, country_obj)
    except EmptyPage:
        raise Http404("Page is empty")
    except PageNotAnInteger:
        raise Http404("No page")

    return render(request, "search.jinja", params)