def to_dict(self): dt = self.data res = { "_id": self.pk, "details_url": "https://z.texty.org.ua/deal/{}".format(dt["id"]), "last_updated_from_dataset": self.last_updated_from_dataset, "first_updated_from_dataset": self.first_updated_from_dataset, } companies = (set([ dt["purchase"]["buyer"]["name"], dt["purchase"]["buyer"]["name_en"], dt["seller"]["name"], ]) | generate_edrpou_options(dt["purchase"]["buyer"]["code"]) | generate_edrpou_options(dt["seller"]["code"]) | generate_edrpou_options( dt["purchase"]["cost_dispatcher_code"])) addresses = set([ dt["seller"]["address"], dt["seller"]["address_full"], dt["purchase"]["buyer"]["address"], dt["purchase"]["buyer"]["address_en"], ]) persons = set() if dt["purchase"]["buyer"]["person"]: persons |= parse_and_generate(dt["purchase"]["buyer"]["person"], "Представник замовника") raw_records = set( [dt["purchase"]["goods_name"], dt["purchase"]["goods_name_short"]]) names_autocomplete = companies if dt["purchase"]["buyer"]["person"]: names_autocomplete |= autocomplete_suggestions( dt["purchase"]["buyer"]["person"]) res.update(dt) res.update({ "companies": list(filter(None, companies)), "addresses": list(filter(None, addresses)), "persons": list(filter(None, persons)), "names_autocomplete": list(filter(None, names_autocomplete)), "raw_records": list(filter(None, raw_records)), }) return res
def to_dict(self): dt = self.data res = {"_id": self.pk} names_autocomplete = set() companies = ( set([dt["party"]]) | generate_edrpou_options(dt["donator_code"]) | generate_edrpou_options(dt["party"]) ) if dt.get("branch_code"): companies |= generate_edrpou_options(dt["branch_code"]) if dt.get("branch_name"): companies |= generate_edrpou_options(dt["branch_name"]) addresses = set([dt["donator_location"]]) persons = set([dt.get("candidate_name")]) if dt["donator_code"]: companies |= set([dt["donator_name"]]) else: persons |= parse_and_generate(dt["donator_name"], "Донор") names_autocomplete |= autocomplete_suggestions(dt["donator_name"]) names_autocomplete |= companies raw_records = set( [ dt.get("account_number"), dt.get("payment_subject"), dt["transaction_doc_number"], ] ) res.update(dt) res.update( { "companies": list(filter(None, companies)), "addresses": list(filter(None, addresses)), "persons": list(filter(None, persons)), "names_autocomplete": list(filter(None, names_autocomplete)), "raw_records": list(filter(None, raw_records)), "type": self.get_type_display(), "period": self.period, "ultimate_recepient": self.ultimate_recepient, } ) return res
def to_dict(self): dt = self.data res = { "_id": self.pk, "last_updated_from_dataset": self.last_updated_from_dataset, "first_updated_from_dataset": self.first_updated_from_dataset, } companies = set() persons = set() companies |= generate_edrpou_options(dt["company_edrpou"]) companies |= deal_with_mixed_lang(dt["company_name"]) companies.add(dt["company_reg_no"]) companies |= deal_with_mixed_lang(dt["tax_office_name"]) addresses = set([dt["company_address"]]) raw_records = set([dt["tax_office_code"]]) names_autocomplete = companies | persons res.update(dt) res.update({ "companies": list(filter(None, companies)), "raw_records": list(filter(None, raw_records)), "addresses": list(filter(None, addresses)), "persons": list(filter(None, persons)), "names_autocomplete": list(filter(None, names_autocomplete)), }) return res
def to_dict(self): dt = self.data res = {"_id": self.pk} names_autocomplete = set() addresses = set() raw_records = set([ dt["obj"], dt["land_plot_info"], dt["number"], dt["tech_oversee"] ]) persons = set([dt["authors_oversee"]]) companies = set([ dt["customer"].strip(" 0"), dt["designer"].strip(" 0"), dt["contractor"].strip(" 0"), ]) for k in ["customer", "designer", "contractor"]: edrpou = parse_edrpou(dt[k]) if edrpou: companies |= generate_edrpou_options(edrpou) if ";" in dt["obj"]: _, adr = dt["obj"].replace("\xa0", " ").split(";", 1) addresses = set([adr]) names_autocomplete |= companies if dt["tech_oversee"]: m = re.search(r"\d{2,}(\s*.*)", dt["tech_oversee"]) if m: parsed = m.group(1) parsed = parsed.replace(";", ",") for p in parsed.split(","): if re.search(r"\d{2,}", p) is None: names_autocomplete |= parse_and_generate( p, "технічний нагляд") else: raw_records.add(p) res.update(dt) res.update({ "persons": list(filter(None, persons)), "companies": list( filter( None, [c for c in companies if not c.lower() == "фізична особа"])), "addresses": list(filter(None, addresses)), "names_autocomplete": list(filter(None, names_autocomplete)), "raw_records": list(filter(None, raw_records)), }) return res
def search_clause(self, donator_code): return (ElasticEDRDRModel.search().filter( "terms", full_edrpou=list(generate_edrpou_options(donator_code))).query( "bool", filter=[ Q("term", internals__flags__has_bo_in_crimea=True) | Q("term", internals__flags__has_founders_in_crimea=True) ], ).execute())
def get_search(self, donation): donator_code = self.parse_code(donation["donator_code"]) if donator_code is None: return None if donator_code not in self.cache: search_res = (ElasticProcurementWinnersModel.search().filter( "terms", seller__code=list(generate_edrpou_options(donator_code))).sort( "-date")[:200].execute()) self.cache[donator_code] = search_res else: search_res = self.cache[donator_code] return search_res
def get_search(self, donation): donator_code = self.parse_code(donation["donator_code"]) if donator_code is None: return None if donator_code not in self.cache: search_res = (ElasticTaxDebtsModel.search().filter( "terms", TIN_S=list(generate_edrpou_options(donator_code))).sort( "-first_updated_from_dataset")[:200].execute()) self.cache[donator_code] = search_res else: search_res = self.cache[donator_code] return search_res
def get_search(self, donation): donator_code = self.parse_code(donation["donator_code"]) if donator_code is None: return None if donator_code not in self.cache: search_res = ( ElasticEDRDRModel.search().filter( "terms", full_edrpou=list( generate_edrpou_options(donator_code))).exclude( "term", latest_record__status="зареєстровано").exclude( "term", full_edrpou=20055032) # ДКСУ .execute()) self.cache[donator_code] = search_res else: search_res = self.cache[donator_code] return search_res
def get_search(self, donation): donator_code = self.parse_code(donation["donator_code"]) if donator_code is None: return None donation_date = parse_dt(donation["donation_date"]).date() search_res = (ElasticEDRDRModel.search().filter( "terms", full_edrpou=list(generate_edrpou_options(donator_code))).query( "range", **{ self.edrdr_flag: { "gte": donation_date - self.change_minus_delta, "lte": donation_date + self.change_plus_delta, } }, ).execute()) return search_res
def to_dict(self): dt = self.data res = { "_id": self.pk, "last_updated_from_dataset": self.last_updated_from_dataset, "first_updated_from_dataset": self.first_updated_from_dataset, } companies = set() addresses = set() persons = set() if dt["TIN_S"]: companies |= deal_with_mixed_lang(dt["NAME"]) companies |= generate_edrpou_options(dt["TIN_S"]) persons |= parse_and_generate(dt["PIB"], "боржник") else: persons |= parse_and_generate(dt["NAME"], "боржник") companies |= deal_with_mixed_lang(dt["DPI"]) persons |= parse_and_generate(dt["DPI_BOSS"], "керівник податкової") names_autocomplete = ( companies | autocomplete_suggestions(dt["NAME"]) | autocomplete_suggestions(dt["PIB"]) | autocomplete_suggestions(dt["DPI_BOSS"]) ) res.update(dt) res.update( { "companies": list(filter(None, companies)), "addresses": list(filter(None, addresses)), "persons": list(filter(None, persons)), "names_autocomplete": list(filter(None, names_autocomplete)), } ) return res
def company_entity(name, code, id_prefix="", jurisdiction="Ukraine", entity_schema="RingCompany", **kwargs): company = ftm_model.make_entity(entity_schema) if jurisdiction == "Ukraine": company.set("alias", generate_edrpou_options(code)) code = format_edrpou(code) company.set("jurisdiction", jurisdiction) company.set("name", name) company.set("registrationNumber", code) company.make_id(id_prefix, code) for k, v in kwargs.items(): company.set(k, v) return company
def to_dict(self): dt = self.data res = { "_id": self.pk, } companies = set([dt["obj"]]) companies |= generate_edrpou_options(dt["edrpou"]) addresses = set([dt["address"]]) names_autocomplete = companies res.update(dt) res.update({ "companies": list(filter(None, companies)), "addresses": list(filter(None, addresses)), "names_autocomplete": list(filter(None, names_autocomplete)), }) return res
def search_clause(self, donator_code): return (ElasticEDRDRModel.search().filter( "terms", full_edrpou=list(generate_edrpou_options(donator_code))).filter( "term", **{self.edrdr_flag: True})[:200].execute())
def handle(self, *args, **options): outfile = Workbook(options["outfile"], {"remove_timezone": True}) worksheet = outfile.add_worksheet("Закупівлі") curr_line = 0 worksheet.write(curr_line, 0, "Замовник") worksheet.write(curr_line, 1, "Код замовника") worksheet.write(curr_line, 2, "Переможець") worksheet.write(curr_line, 3, "Код переможець") worksheet.write(curr_line, 4, "Предмет закупівлі") worksheet.write(curr_line, 5, "Дата закупівлі") worksheet.write(curr_line, 6, "Рік") worksheet.write(curr_line, 7, "Очікувана сума") worksheet.write(curr_line, 8, "Актуальна сума") with tqdm() as pbar: for chunk in grouper(options["infile"], options["batch_size"]): chunk = list(filter(None, chunk)) requests = MultiSearch() for l in chunk: l = l.strip().strip("\u200e") search = [] if l.isdigit(): search = list(generate_edrpou_options(l)) else: search = [l] q = ElasticProcurementWinnersModel.search().filter( "terms", **{options["field"]: search} ) requests = requests.add(q) currency_format = outfile.add_format({'num_format': '#,##0.00 ₴'}) if len(chunk): results = requests.execute() for r in results: for line in r: curr_line += 1 worksheet.write(curr_line, 0, line["purchase"]["buyer"]["name"]) worksheet.write_url( curr_line, 1, "https://ring.org.ua/edr/uk/company/{}".format(line["purchase"]["buyer"]["code"]), string=format_edrpou(line["purchase"]["buyer"]["code"]), ) worksheet.write(curr_line, 2, line["seller"]["name"]) worksheet.write_url( curr_line, 3, "https://ring.org.ua/edr/uk/company/{}".format(line["seller"]["code"]), string=format_edrpou(line["seller"]["code"]), ) worksheet.write(curr_line, 4, line["purchase"]["goods_name"]) worksheet.write(curr_line, 5, date_filter(line["date"])) worksheet.write(curr_line, 6, line["date"].year) if "expected_volume" in line: worksheet.write(curr_line, 7, line["expected_volume"], currency_format) worksheet.write(curr_line, 8, line["volume_uah"], currency_format) pbar.update(len(chunk)) outfile.close()
def to_dict(self): dt = self.data res = { "_id": self.pk, "report_id": dt["report"]["id"], "timestamp": dt_parse(dt["report"]["timestamp"]), "last_updated_from_dataset": self.last_updated_from_dataset, "first_updated_from_dataset": self.first_updated_from_dataset, } names_autocomplete = set() companies = set() persons = set() addresses = set() title1 = self.title1_jmespath.search(dt) title2 = self.title2_jmespath.search(dt) report_title = self.title3_jmespath.search(dt) if not report_title: return None report_title = report_title[0] report_title["STD"] = dt_parse(report_title["STD"]) report_title["FID"] = dt_parse(report_title["FID"]) titles = title1 + title2 if titles: title = titles[0] address = ", ".join( filter( None, [ title.get("E_CONT"), title.get("E_ADRES"), title.get("E_POST"), title.get("E_RAYON"), title.get("E_STREET"), ], ) ) addresses.add(address) res["detailed_title"] = title companies |= deal_with_mixed_lang(title.get("E_NAME")) if title.get("FIO_PODP"): for p in deal_with_mixed_lang(title["FIO_PODP"]): persons |= parse_and_generate( p, title["POS_PODP"] or "" ) names_autocomplete |= autocomplete_suggestions(p) res["report_title"] = report_title companies |= generate_edrpou_options(report_title.get("D_EDRPOU")) companies |= deal_with_mixed_lang(report_title.get("D_NAME")) associates = self.current_persons_jmespath.search(dt) dismissed_associates = self.fired_persons_jmespath.search(dt) res["associates"] = associates res["dismissed_associates"] = dismissed_associates for assoc in associates + dismissed_associates: assoc["DAT_PASP"] = assoc.get("DAT_PASP") if assoc["DAT_PASP"]: assoc["DAT_PASP"] = dt_parse(assoc["DAT_PASP"]) full_name = assoc.get("P_I_B", "") or "" if full_name.strip(): parsed_name = "" parsed_chunks = [] # TODO: better word splitting for chunk in full_name.split(): # TODO: better detection of latin chunk = try_to_fix_mixed_charset(chunk) if ( is_eng(chunk) or chunk.startswith("(") or chunk.endswith(")") or chunk in "-" or chunk.startswith("-") ): break elif chunk: parsed_chunks.append(chunk) # Looks like real person if len(parsed_chunks) in [2, 3]: persons |= parse_and_generate( " ".join(parsed_chunks), assoc.get("POSADA", "") or "" ) names_autocomplete |= autocomplete_suggestions(" ".join(parsed_chunks)) persons |= parse_and_generate( full_name, assoc.get("POSADA", "") or "" ) names_autocomplete |= autocomplete_suggestions(full_name) else: companies.add(" ".join(parsed_chunks)) companies |= deal_with_mixed_lang(full_name) names_autocomplete |= companies res.update( { "companies": list(filter(None, companies)), "addresses": list(filter(None, addresses)), "persons": list(filter(None, persons)), "names_autocomplete": list(filter(None, names_autocomplete)), } ) return res