Пример #1
0
def dump_orgs():
    orgs = load_utf_json(RAW_JSON_FNAME)
    for org in orgs:
        for rubric in [TARGETS, ACTIVITIES, PROJECTS, SERVICES]:
            item = org[rubric]
            if item:
                org[rubric] = str_to_list(item)
        raw_codes = org[CODES]
        if raw_codes:
            codes = str_to_list(raw_codes)
            org[CODES] = codes
            org[OGRN] = find_code(codes, r'ОГРН')
            org[INN] = find_code(codes, r'ИНН')
            org[KPP] = find_code(codes, r'КПП')
            org[OKPO] = find_code(codes, r'ОКПО')
            org[OKATO] = find_code(codes, r'ОКАТО')
        else:
            org[OGRN], org[INN], org[KPP], org[OKPO], org[
                OKATO] = None, None, None, None, None
        if org[SOURCE] == 'http://nko71.ru/katalog-nko/nko-po-uslugam/sotsialnaya-pomoshch-i-podderzhka/nasledie.html':
            org[OGRN] = '1097100001129'
        if org[SOURCE] == 'http://nko71.ru/katalog-nko/nko-po-gruppam-naseleniya/zhenshchiny-semi-s-detmi/soyuz-' +\
                'pravoslavnykh-zhenshchin.html':
            org[INN] = '7116511663'
            org[KPP] = '711601001'

    dump_utf_json(orgs, JSON_FNAME)
Пример #2
0
def dump_orgs():
    orgs = list()

    for year in range(2012, 2018):
        print("Parsing %d..." % year)
        orgs.extend(parse_xls(year))

    dump_utf_json(orgs, JSON_FNAME)
Пример #3
0
def dump_raw_orgs():
    orgs = list()
    urls = load_utf_json(URL_JSON_FNAME)
    ind = -1
    for url in urls:
        ind += 1
        org = scrape_org(url)
        print(ind)
        print(org[ORGNAME])
        print()
        orgs.append(org)
    dump_utf_json(orgs, RAW_JSON_FNAME)
Пример #4
0
def add_fields(source_json, target_json=None, **fields):
    if not target_json:
        target_json = source_json
    entries = load_utf_json(source_json)
    total = len(entries)
    count = 0
    for entry in entries:
        count += 1
        print("\r{} / {}".format(count, total), end='', flush=True)
        for fieldname, val in fields.items():
            entry[fieldname] = val
    print("\nDumping...")
    dump_utf_json(entries, target_json)
Пример #5
0
def make_json():
    orgs2016 = scrape(url=URL2016,
                      beg=5,
                      diff=3,
                      field=COMMENT,
                      field_n_a=VIOLATIONS)
    orgs2017 = scrape(url=URL2017,
                      beg=6,
                      diff=2,
                      field=VIOLATIONS,
                      field_n_a=COMMENT)
    orgs_2016_2017 = orgs2016 + orgs2017
    dump_utf_json(orgs_2016_2017, JSON_FNAME)
Пример #6
0
def duplicate_entry(source_json, target_json=None, **fields):
    if not target_json:
        target_json = source_json
    entries = load_utf_json(source_json)
    total = len(entries)
    for indx in range(len(entries)):
        count = indx + 1
        entry = entries[indx]
        print("\r{} / {}".format(count, total), end='', flush=True)
        for fieldname, val in fields.items():
            if entry[fieldname] != val:
                break
        else:
            entries.insert(count, entry)
            print("\nFound at {}, inserted at {}!".format(indx, count))
            print("Dumping...")
            dump_utf_json(entries, target_json)
            return
    print("\nNo such entry!")
Пример #7
0
def dump_urls():
    dump_utf_json(scrape_urls(), URL_JSON_FNAME)