def handle(self, *args, **options):
        self.apply_migrations()
        all_decls = Declaration.search().query('match_all').scan()
        for decl in all_decls:
            sys.stdout.write('Processing decl for {}\n'.format(
                decl.general.full_name))
            sys.stdout.flush()

            decl.general.full_name = replace_apostrophes(
                decl.general.full_name)
            decl.general.name = replace_apostrophes(decl.general.name)
            decl.general.last_name = replace_apostrophes(
                decl.general.last_name)
            decl.general.patronymic = replace_apostrophes(
                decl.general.patronymic)
            decl.general.full_name_suggest = {
                'input': [
                    decl.general.full_name, ' '.join([
                        decl.general.name, decl.general.patronymic,
                        decl.general.last_name
                    ]), ' '.join([decl.general.name, decl.general.last_name])
                ]
            }

            decl.ft_src = "\n".join(filter_only_interesting(decl.to_dict()))

            decl.general.full_name_for_sorting = keyword_for_sorting(
                decl.general.full_name)
            decl.index_card = concat_fields(decl.to_dict(),
                                            Declaration.INDEX_CARD_FIELDS)

            decl.save()
示例#2
0
    def handle(self, *args, **options):
        translator = Translator()
        translator.fetch_full_dict_from_db()

        self.apply_migrations()
        all_decls = Declaration.search().query('match_all').scan()
        for decl in tqdm.tqdm(all_decls):
            decl_dct = decl.to_dict()

            decl.general.full_name = replace_apostrophes(decl.general.full_name)
            decl.general.name = replace_apostrophes(decl.general.name)
            decl.general.last_name = replace_apostrophes(decl.general.last_name)
            decl.general.patronymic = replace_apostrophes(decl.general.patronymic)
            decl.general.full_name_suggest = {
                'input': [
                    decl.general.full_name,
                    ' '.join([decl.general.name,
                              decl.general.patronymic,
                              decl.general.last_name]),
                    ' '.join([decl.general.name,
                              decl.general.last_name])
                ]
            }

            decl_dct["ft_src"] = ""
            terms = filter_only_interesting(decl_dct)
            terms += [translator.translate(x)["translation"] for x in terms]
            decl.ft_src = "\n".join(terms)

            decl.general.full_name_for_sorting = keyword_for_sorting(decl.general.full_name)
            decl.index_card = concat_fields(decl_dct,
                                            Declaration.INDEX_CARD_FIELDS)

            extracted_names = [(decl.general.last_name, decl.general.name, decl.general.patronymic, None)]
            persons = set()
            names_autocomplete = set()

            for person in decl.general.family:
                l, f, p, _ = parse_fullname(person.family_name)
                extracted_names.append((l, f, p, person.relations))

            for name in extracted_names:
                persons |= generate_all_names(
                    *name
                )

                names_autocomplete |= autocomplete_suggestions(
                    concat_name(*name[:-1])
                )


            decl.persons = list(filter(None, persons))
            decl.names_autocomplete = list(filter(None, names_autocomplete))

            decl.save()
示例#3
0
    def handle(self, *args, **options):
        self.apply_migrations()
        all_decls = Declaration.search().query('match_all').scan()
        for decl in all_decls:
            decl_dct = decl.to_dict()
            sys.stdout.write('Processing decl for {}\n'.format(
                decl.general.full_name))
            sys.stdout.flush()

            decl.general.full_name = replace_apostrophes(
                decl.general.full_name)
            decl.general.name = replace_apostrophes(decl.general.name)
            decl.general.last_name = replace_apostrophes(
                decl.general.last_name)
            decl.general.patronymic = replace_apostrophes(
                decl.general.patronymic)
            decl.general.full_name_suggest = {
                'input': [
                    decl.general.full_name, ' '.join([
                        decl.general.name, decl.general.patronymic,
                        decl.general.last_name
                    ]), ' '.join([decl.general.name, decl.general.last_name])
                ]
            }

            decl_dct["ft_src"] = ""
            decl.ft_src = "\n".join(filter_only_interesting(decl_dct))

            decl.general.full_name_for_sorting = keyword_for_sorting(
                decl.general.full_name)
            decl.index_card = concat_fields(decl_dct,
                                            Declaration.INDEX_CARD_FIELDS)

            extracted_names = [(decl.general.last_name, decl.general.name,
                                decl.general.patronymic, None)]
            persons = set()
            names_autocomplete = set()

            for person in decl.general.family:
                l, f, p, _ = parse_fullname(person.family_name)
                extracted_names.append((l, f, p, person.relations))

            for name in extracted_names:
                persons |= generate_all_names(*name)

                names_autocomplete |= autocomplete_suggestions(
                    concat_name(*name[:-1]))

            decl.persons = list(filter(None, persons))
            decl.names_autocomplete = list(filter(None, names_autocomplete))

            decl.save()
示例#4
0
    def handle(self, *args, **options):
        all_decls = Declaration.search().query('match_all').scan()
        for decl in all_decls:
            print('Processing decl for {}'.format(decl.general.full_name))

            decl.general.full_name = replace_apostrophes(decl.general.full_name)
            decl.general.name = replace_apostrophes(decl.general.name)
            decl.general.last_name = replace_apostrophes(decl.general.last_name)
            decl.general.patronymic = replace_apostrophes(decl.general.patronymic)
            decl.general.full_name_suggest = {
                'input': [
                    decl.general.full_name,
                    ' '.join([decl.general.name,
                              decl.general.patronymic,
                              decl.general.last_name]),
                    ' '.join([decl.general.name,
                              decl.general.last_name])
                ]
            }

            decl.ft_src = "\n".join(filter_only_interesting(decl.to_dict()))

            decl.save()
示例#5
0
def save_search(request):
    query = replace_apostrophes(request.GET.get("q", "")).strip()
    deepsearch = bool(request.GET.get("deepsearch", ""))

    params = request.GET.copy()
    for key in ("q", "deepsearch", "format", "page", "sort"):
        if key in params:
            params.pop(key)

    response = do_save_search(request, query, deepsearch, params.urlencode())

    if request.is_ajax():
        return HttpResponse('OK')

    return response
示例#6
0
def save_search(request):
    query = replace_apostrophes(request.GET.get("q", "")).strip()
    deepsearch = bool(request.GET.get("deepsearch", ""))

    if len(query) < 2:
        messages.warning(request,
                         'Не вдалось створити завдання з пустим запитом.')
        return redirect('search_list')

    if len(query) > 150:
        messages.warning(
            request, 'Не вдалось створити завдання з таким довгим запитом.')
        return redirect('search_list')

    if not request.user.email:
        messages.warning(
            request,
            'Не вдалось створити завдання без адреси електронної пошти. ' +
            'Спочатку введіть адресу.')
        return redirect(
            reverse_qs('edit_email', qs={'next': request.get_full_path()}))

    # don't add twice
    if SearchTask.objects.filter(user=request.user,
                                 query=query,
                                 deepsearch=deepsearch,
                                 is_deleted=False).exists():
        messages.warning(request, 'Таке завдання вже існує.')
        return redirect('search_list')

    task = SearchTask(user=request.user, query=query, deepsearch=deepsearch)
    task.save()

    first_run(task)
    if not send_newtask_notify(task):
        messages.warning(
            request,
            'Не вдалось відправити лист на адресу %s' % task.user.email)

    messages.success(request, 'Завдання "%s" створено.' % task.query)
    return redirect('search_list')
示例#7
0
    def _parse_me(cls, base_fname):
        json_fname = "{}.json".format(base_fname)
        html_fname = "{}.html".format(base_fname)
        resp = {
            "intro": {},
            "declaration": {}
        }

        try:
            with open(json_fname, "r") as fp:
                data = json.load(fp)

            with open(html_fname, "r") as fp:
                raw_html = fp.read()
                html = Selector(raw_html)
        except ValueError:
            print(
                "File {} or it's HTML counterpart cannot be parsed".format(json_fname))
            return None
        except FileNotFoundError:
            print(
                "File {} or it's HTML counterpart cannot be found".format(json_fname))
            return None

        id_ = data.get("id")
        created_date = data.get("created_date")

        raw_html_lowered = raw_html.lower()
        for chunk in cls.dangerous_chunks:
            if chunk in raw_html_lowered:
                raise BadHTMLData("Dangerous fragment found: {}, {}".format(
                    id_, base_fname))

        try:
            data = data["data"]
        except KeyError:
            raise BadJSONData("API brainfart: {}, {}".format(id_, base_fname))

        if "step_0" not in data:
            raise BadJSONData("Bad header format: {}, {}".format(id_, base_fname))

        resp["_id"] = "nacp_{}".format(id_)
        resp["ft_src"] = "\n".join(cls.extract_textual_data(html))
        resp["nacp_orig"] = data
        resp["declaration"]["url"] = "https://public.nazk.gov.ua/declaration/{}".format(id_)
        resp["declaration"]["source"] = "NACP"
        resp["declaration"]["basename"] = os.path.basename(base_fname)

        resp["intro"]["corrected"] = id_ in cls.corrected
        resp["intro"]["date"] = cls.parse_date(created_date)

        if "declarationType" not in data["step_0"] or "changesYear" in data["step_0"]:
            resp["intro"]["doc_type"] = "Форма змін"

            if "changesYear" in data["step_0"]:
                resp["intro"]["declaration_year"] = int(data["step_0"]["changesYear"])
        else:
            resp["intro"]["doc_type"] = cls.declaration_types[data["step_0"]["declarationType"]]
            if "declarationYearTo" in data["step_0"]:
                resp["intro"]["declaration_year_to"] = cls.parse_date(data["step_0"]["declarationYearTo"])

            if "declarationYearFrom" in data["step_0"]:
                resp["intro"]["declaration_year_from"] = cls.parse_date(data["step_0"]["declarationYearFrom"])
                resp["intro"]["declaration_year"] = resp["intro"]["declaration_year_from"].year

            if "declarationYear1" in data["step_0"]:
                resp["intro"]["declaration_year"] = int(data["step_0"]["declarationYear1"])

            if "declarationYear3" in data["step_0"] and data["step_0"]["declarationYear3"]:
                resp["intro"]["declaration_year"] = int(data["step_0"]["declarationYear3"])

            if "declarationYear4" in data["step_0"] and data["step_0"]["declarationYear4"]:
                resp["intro"]["declaration_year"] = int(data["step_0"]["declarationYear4"])

        resp["general"] = {
            "last_name": replace_apostrophes(title(data["step_1"]["lastname"])),
            "name": replace_apostrophes(title(data["step_1"]["firstname"])),
            "patronymic": replace_apostrophes(title(data["step_1"]["middlename"])),
            "full_name": replace_apostrophes("{} {} {}".format(
                title(data["step_1"]["lastname"]),
                title(data["step_1"]["firstname"]),
                title(data["step_1"]["middlename"]),
            )),
            "post": {
                "post": replace_apostrophes(data["step_1"].get("workPost", "")),
                "post_type": replace_apostrophes(data["step_1"].get("postType", "")),
                "office": replace_apostrophes(data["step_1"].get("workPlace", "")),
                "actual_region": replace_apostrophes(cls.region_types.get(data["step_1"].get("actual_region", ""), "")),
                "region": replace_apostrophes(cls.region_types.get(data["step_1"].get("region", ""), "")),
            }
        }

        if "step_2" in data:
            family = data["step_2"]

            if isinstance(family, dict):
                resp["general"]["family"] = []

                for member in family.values():
                    if not isinstance(member, dict):
                        continue

                    resp["general"]["family"].append({
                        "family_name": replace_apostrophes("{} {} {}".format(
                            title(member.get("lastname", "")),
                            title(member.get("firstname", "")),
                            title(member.get("middlename", "")),
                        )),

                        "relations": member.get("subjectRelation", "")
                    })

        # get regions from estate list
        if "step_3" in data and isinstance(data["step_3"], dict) and data["step_3"]:
            if "estate" not in resp:
                resp["estate"] = []
            for estate in data["step_3"].values():
                if isinstance(estate, dict) and "region" in estate:
                    region = replace_apostrophes(cls.region_types.get(estate.get("region", ""), ""))
                    if region:
                        resp["estate"].append({"region": region})

        if "step_4" in data and isinstance(data["step_4"], dict) and data["step_4"]:
            if "estate" not in resp:
                resp["estate"] = []
            for estate in data["step_4"].values():
                if isinstance(estate, dict) and "region" in estate:
                    region = replace_apostrophes(cls.region_types.get(estate.get("region", ""), ""))
                    if region:
                        resp["estate"].append({"region": region})

        if "estate" in resp:
            estate_list = html.css(
                "table:contains('Місцезнаходження') td:contains('Населений пункт') span::text"
            ).extract()

            for estate in estate_list:
                region = cls.decode_region(estate)
                if region:
                    resp["estate"].append({"region": region})

        resp['general']['full_name_suggest'] = [
            {
                'input': resp['general']['full_name'],
                'weight': 5
            },
            {
                'input': ' '.join(
                    [
                        resp['general']['name'],
                        resp['general']['patronymic'],
                        resp['general']['last_name']
                    ]
                ),
                'weight': 3
            },
            {
                'input': ' '.join(
                    [
                        resp['general']['name'],
                        resp['general']['last_name']
                    ]
                ),
                'weight': 3
            }
        ]

        resp['general']['full_name_for_sorting'] = keyword_for_sorting(resp['general']['full_name'])

        if not resp["general"]["post"]["region"]:
            region_html = html.css(
                "fieldset:contains('Зареєстроване місце проживання') .person-info:contains('Місто')::text"
            ).extract()
            if len(region_html) > 1:
                resp["general"]["post"]["region"] = cls.decode_region(region_html[1])

        if not resp["general"]["post"]["actual_region"]:
            region_html = html.css(
                "fieldset:contains('Місце фактичного проживання') .person-info:contains('Місто')::text"
            ).extract()
            if len(region_html) > 1:
                resp["general"]["post"]["actual_region"] = cls.decode_region(region_html[1])

        # if set only one region use it value for second one
        if not resp["general"]["post"]["actual_region"] and resp["general"]["post"]["region"]:
            resp["general"]["post"]["actual_region"] = resp["general"]["post"]["region"]
        elif not resp["general"]["post"]["region"] and resp["general"]["post"]["actual_region"]:
            resp["general"]["post"]["region"] = resp["general"]["post"]["actual_region"]

        resp["index_card"] = concat_fields(resp, NACPDeclaration.INDEX_CARD_FIELDS)

        return NACPDeclaration(**resp).to_dict(True)
示例#8
0
    def _parse_me(cls, base_fname):
        json_fname = "{}.json".format(base_fname)
        html_fname = "{}.html".format(base_fname)
        resp = {"intro": {}, "declaration": {}}

        try:
            with open(json_fname, "r") as fp:
                data = json.load(fp)

            with open(html_fname, "r") as fp:
                raw_html = fp.read()
                html = Selector(raw_html)
        except ValueError:
            print("File {} or it's HTML counterpart cannot be parsed".format(
                json_fname))
            return None
        except FileNotFoundError:
            print("File {} or it's HTML counterpart cannot be found".format(
                json_fname))
            return None

        id_ = data.get("id")
        created_date = data.get("created_date")

        raw_html_lowered = raw_html.lower()
        for chunk in cls.dangerous_chunks:
            if chunk in raw_html_lowered:
                raise BadHTMLData("Dangerous fragment found: {}, {}".format(
                    id_, base_fname))

        try:
            data = data["data"]
        except KeyError:
            raise BadJSONData("API brainfart: {}, {}".format(id_, base_fname))

        if "step_0" not in data:
            raise BadJSONData("Bad header format: {}, {}".format(
                id_, base_fname))

        resp["_id"] = "nacp_{}".format(id_)
        resp["nacp_src"] = "\n".join(cls.extract_textual_data(html))
        resp["nacp_orig"] = data
        resp["declaration"][
            "url"] = "https://public.nazk.gov.ua/declaration/{}".format(id_)
        resp["declaration"]["source"] = "NACP"
        resp["declaration"]["basename"] = os.path.basename(base_fname)

        resp["intro"]["corrected"] = id_ in cls.corrected
        resp["intro"]["date"] = cls.parse_date(created_date)

        if "declarationType" not in data["step_0"] or "changesYear" in data[
                "step_0"]:
            resp["intro"]["doc_type"] = "Форма змін"

            if "changesYear" in data["step_0"]:
                resp["intro"]["declaration_year"] = int(
                    data["step_0"]["changesYear"])
        else:
            resp["intro"]["doc_type"] = cls.declaration_types[
                data["step_0"]["declarationType"]]
            if "declarationYearTo" in data["step_0"]:
                resp["intro"]["declaration_year_to"] = cls.parse_date(
                    data["step_0"]["declarationYearTo"])

            if "declarationYearFrom" in data["step_0"]:
                resp["intro"]["declaration_year_from"] = cls.parse_date(
                    data["step_0"]["declarationYearFrom"])
                resp["intro"]["declaration_year"] = resp["intro"][
                    "declaration_year_from"].year

            if "declarationYear1" in data["step_0"]:
                resp["intro"]["declaration_year"] = int(
                    data["step_0"]["declarationYear1"])

            if "declarationYear3" in data["step_0"]:
                resp["intro"]["declaration_year"] = int(
                    data["step_0"]["declarationYear3"])

            if "declarationYear4" in data["step_0"]:
                resp["intro"]["declaration_year"] = int(
                    data["step_0"]["declarationYear4"])

        resp["general"] = {
            "last_name":
            replace_apostrophes(title(data["step_1"]["lastname"])),
            "name":
            replace_apostrophes(title(data["step_1"]["firstname"])),
            "patronymic":
            replace_apostrophes(title(data["step_1"]["middlename"])),
            "full_name":
            replace_apostrophes("{} {} {}".format(
                title(data["step_1"]["lastname"]),
                title(data["step_1"]["firstname"]),
                title(data["step_1"]["middlename"]),
            )),
            "post": {
                "post":
                replace_apostrophes(data["step_1"].get("workPost", "")),
                "office":
                replace_apostrophes(data["step_1"].get("workPlace", "")),
                "region":
                replace_apostrophes(
                    cls.region_types.get(
                        data["step_1"].get("actual_region", ""), "")),
            }
        }

        if "step_2" in data:
            family = data["step_2"]

            if isinstance(family, dict):
                resp["general"]["family"] = []

                for member in family.values():
                    if not isinstance(member, dict):
                        continue

                    resp["general"]["family"].append({
                        "family_name":
                        replace_apostrophes("{} {} {}".format(
                            title(member.get("lastname", "")),
                            title(member.get("firstname", "")),
                            title(member.get("middlename", "")),
                        )),
                        "relations":
                        member.get("subjectRelation", "")
                    })

        resp['general']['full_name_suggest'] = [{
            'input':
            resp['general']['full_name'],
            'weight':
            5
        }, {
            'input':
            ' '.join([
                resp['general']['name'], resp['general']['patronymic'],
                resp['general']['last_name']
            ]),
            'weight':
            3
        }, {
            'input':
            ' '.join([resp['general']['name'], resp['general']['last_name']]),
            'weight':
            3
        }]

        if not resp["general"]["post"]["region"]:
            region_html = html.css(
                "fieldset:contains('Зареєстроване місце проживання') .person-info:contains('Місто')::text"
            ).extract()
            if len(region_html) > 1:
                chunks = region_html[1].split("/")
                if len(chunks) > 1:
                    resp["general"]["post"]["region"] = chunks[-2].strip()
                else:
                    pass

        if resp["general"]["post"]["region"].lower() in cls.region_mapping:
            resp["general"]["post"]["region"] = cls.region_mapping[
                resp["general"]["post"]["region"].lower()]
        else:
            resp["general"]["post"]["region"] = ""

        return NACPDeclaration(**resp).to_dict(True)