Exemplo n.º 1
0
def sync_data(conn, filename, batch_time):
    backup_filename = "{}-{:02d}-{:02d}-{}".format(batch_time.year,
                                                   batch_time.month,
                                                   batch_time.day, filename)
    print("\n\n-- ----------------------------- ")
    print("-- File: {}".format(backup_filename))

    summary = Summary()

    for parlamentarier_id, nachname, vorname in db.get_active_parlamentarier(
            conn):
        summary_row = summary.get_row(parlamentarier_id)
        summary_row.parlamentarier_name = nachname + ", " + vorname

    with open(filename) as data_file:
        content = json.load(data_file)
        pdf_creation_date = content["metadata"]["pdf_creation_date"]
        archive_pdf_name = content["metadata"]["archive_pdf_name"]
        stichdatum = datetime.strptime(pdf_creation_date, "%Y-%m-%d %H:%M:%S")
        print("-- PDF creation date: {}".format(stichdatum))
        print("-- PDF archive file: {}".format(
            content["metadata"]["archive_pdf_name"]))
        print("-- ----------------------------- ")

        handle_removed_groups(content, conn, summary, stichdatum, batch_time)

        for group in content["data"]:
            name_de = normalize_organisation(group["name_de"])
            name_fr = normalize_organisation(group["name_fr"])
            name_it = normalize_organisation(group["name_it"])
            members = group["praesidium"]

            organisation_id = db.get_organisation_id(conn, name_de, name_fr,
                                                     name_it)

            if organisation_id:
                handle_names(group, name_de, name_fr, name_it, organisation_id,
                             summary, conn, batch_time)

            handle_homepage_and_sekretariat(group, name_de, name_fr, name_it,
                                            organisation_id, summary, conn,
                                            batch_time)

            for member in members:
                names = get_names(member)
                parlamentarier_id = db.get_parlamentarier_id_by_name(
                    conn, names)

                if not parlamentarier_id:
                    print(
                        "DATA INTEGRITY FAILURE: Parlamentarier {} not found in database."
                        .format(member))
                    sys.exit(1)

                parlamentarier_dict = db.get_parlamentarier_dict(
                    conn, parlamentarier_id)
                geschlecht = parlamentarier_dict["geschlecht"]
                beschreibung = ''
                if len(members) > 1:
                    if geschlecht == "M":
                        beschreibung = "Co-Präsident"
                    if geschlecht == "F":
                        beschreibung = "Co-Präsidentin"

                interessenbindung_id = None
                if parlamentarier_id and organisation_id:
                    interessenbindung_id = db.get_interessenbindung_id(
                        conn, parlamentarier_id, organisation_id, stichdatum)

                summary_row = summary.get_row(parlamentarier_id)
                if not interessenbindung_id:
                    print("\n-- Neue Interessenbindung zwischen '{}' und '{}'".
                          format(name_de, member))
                    if not organisation_id:
                        organisation_id = '@last_parlamentarische_gruppe'
                        summary_row.neue_gruppe("neu", name_de)
                    else:
                        summary_row.neue_gruppe(organisation_id, name_de)

                    print(
                        sql_statement_generator.
                        insert_interessenbindung_parlamentarische_gruppe(
                            parlamentarier_id, organisation_id, stichdatum,
                            beschreibung, batch_time))

                else:
                    summary_row.gruppe_unveraendert(organisation_id, name_de)

    return (summary)
Exemplo n.º 2
0
def handle_removed_groups(group_type, content, conn, summary, stichdatum,
                          batch_time, pdf_date):
    print('-- Check removed groups...')
    ib_managed_by_import = db.get_pg_interessenbindungen_managed_by_import(
        group_type, conn)
    if ib_managed_by_import:
        parlamentarier_id_cache = {}
        organisation_id_cache = {}
        lastprogress = -1
        processed_org_name = {}
        for i, (ib_id, ib_art, ib_funktion_im_gremium, org_id, org_name,
                parl_vorname, parl_zweiter_vorname, parl_nachname,
                parl_id) in enumerate(ib_managed_by_import):
            progress = 100 * i // len(ib_managed_by_import)
            if progress % 25 == 0 and progress != lastprogress:
                print('-- Progress {}%'.format(progress))
                lastprogress = progress
            present = False
            if org_name not in processed_org_name:
                processed_org_name[org_name] = {'id': org_id, 'state': False}
            for group in content["data"]:
                org_key = group["name_de"]
                if org_key in organisation_id_cache:
                    organisation_id, inaktiv, name_de, name_fr, name_it, rechtsform = organisation_id_cache[
                        org_key]
                else:
                    organisation_id, inaktiv, name_de, name_fr, name_it, rechtsform = get_organisation(
                        group, conn)
                    organisation_id_cache[org_key] = (organisation_id, inaktiv,
                                                      name_de, name_fr,
                                                      name_it, rechtsform)
                if org_id == organisation_id:
                    processed_org_name[org_name]['state'] = True
                    members = group["praesidium"] + group["mitglieder"]
                    processed_parlamentarier_ids = []
                    for member, title in members:
                        parl_key = (member, title)
                        if parl_key in parlamentarier_id_cache:
                            parlamentarier_id, parlamentarier_bis = parlamentarier_id_cache[
                                parl_key]
                        else:
                            names = get_names(member)
                            parlamentarier_id, parlamentarier_bis = db.get_parlamentarier_id_by_name(
                                conn, names, False)
                            parlamentarier_id_cache[parl_key] = (
                                parlamentarier_id, parlamentarier_bis)

                        if not parlamentarier_id:
                            print(
                                "DATA INTEGRITY FAILURE: Parlamentarier '{}' of group '{}' not found in database."
                                .format(member, name_de))
                            sys.exit(1)
                        elif parlamentarier_bis and parlamentarier_bis < date.today(
                        ):
                            # print("-- INFO: Parlamentarier '{}' ({}) ist nicht mehr aktiv ('{}')".format(member, parlamentarier_id, parlamentarier_bis))
                            continue
                        elif parlamentarier_id in processed_parlamentarier_ids:
                            # print('-- INFO: Ignore duplicate member "{}" ({}) in PG "{}"'.format(member, parlamentarier_id, name_de))
                            continue
                        else:
                            processed_parlamentarier_ids.append(
                                parlamentarier_id)

                        art = "vorstand" if title else "mitglied"
                        # Do not check funktion_im_gremium
                        if parl_id == parlamentarier_id and ib_art == art:
                            present = True
                            break

            if not present:
                full_name = parl_vorname
                if parl_zweiter_vorname:
                    full_name += " " + parl_zweiter_vorname
                full_name += " " + parl_nachname
                print(
                    "\n-- Interessenbindung zwischen Parlamentarier '{}' und Gruppe '{}' als {}{} nicht mehr vorhanden"
                    .format(
                        full_name, org_name, ib_art,
                        '/' + ib_funktion_im_gremium
                        if ib_funktion_im_gremium else ''))
                print(
                    sql_statement_generator.end_interessenbindung(
                        ib_id, stichdatum, batch_time, pdf_date))

                summary.gruppe_beendet(parl_id, ib_id, org_name, ib_art)

        for processed_org_name, processed_org_val in processed_org_name.items(
        ):
            if not processed_org_val['state']:
                inaktive_organisation_id = processed_org_val['id']
                print("\n-- Setze Gruppe '{}' ({}) inaktiv".format(
                    processed_org_name, inaktive_organisation_id))
                print(
                    sql_statement_generator.update_inaktiv_organisation(
                        group_type, inaktive_organisation_id, True, batch_time,
                        pdf_date))
                summary.gruppe_geloescht(inaktive_organisation_id,
                                         processed_org_name)

    print('-- Progress {}%'.format(100))
Exemplo n.º 3
0
def sync_data(conn, filename, batch_time):
    backup_filename = "{}-{:02d}-{:02d}-{}".format(
        batch_time.year, batch_time.month, batch_time.day, filename)
    print("\n\n-- ----------------------------- ")
    print("-- File: {}".format(backup_filename))

    summary = Summary()

    for parlamentarier_id, nachname, vorname in db.get_active_parlamentarier(conn):
        summary_row = summary.get_row(parlamentarier_id)
        summary_row.parlamentarier_name = nachname + ", " + vorname

    with open(filename) as data_file:
        content = json.load(data_file)
        stichdatum = datetime.strptime(
            content["metadata"]["pdf_creation_date"], "%Y-%m-%d %H:%M:%S")
        print("-- PDF creation date: {}".format(stichdatum))
        print(
            "-- PDF archive file: {}".format(content["metadata"]["archive_pdf_name"]))
        print("-- ----------------------------- ")

        handle_removed_groups(content, conn, summary, stichdatum, batch_time)

        for group in content["data"]:
            name_de = normalize_organisation(group["name_de"])
            name_fr = normalize_organisation(group["name_fr"])
            name_it = normalize_organisation(group["name_it"])
            members = group["praesidium"]

            organisation_id = db.get_organisation_id(conn, name_de, name_fr, name_it)

            if organisation_id:
                handle_names(group, name_de, name_fr, name_it, organisation_id, summary, conn, batch_time)

            handle_homepage_and_sekretariat(group, name_de, name_fr, name_it, organisation_id, summary, conn, batch_time)

            for member in members:
                names = get_names(member)
                parlamentarier_id = db.get_parlamentarier_id_by_name(conn, names)

                if not parlamentarier_id:
                    print("DATA INTEGRITY FAILURE: Parlamentarier {} not found in database.".format(member))
                    sys.exit(1)

                parlamentarier_dict = db.get_parlamentarier_dict(conn, parlamentarier_id)
                geschlecht = parlamentarier_dict["geschlecht"]
                beschreibung = ''
                if len(members) > 1:
                    if geschlecht == "M":
                        beschreibung = "Co-Präsident"
                    if geschlecht == "F":
                        beschreibung = "Co-Präsidentin"

                interessenbindung_id = None
                if parlamentarier_id and organisation_id:
                    interessenbindung_id = db.get_interessenbindung_id(
                        conn, parlamentarier_id, organisation_id, stichdatum)

                summary_row = summary.get_row(parlamentarier_id)
                if not interessenbindung_id:
                    print(
                        "\n-- Neue Interessenbindung zwischen '{}' und '{}'".format(name_de, member))
                    if not organisation_id:
                        organisation_id = '@last_parlamentarische_gruppe'
                        summary_row.neue_gruppe("neu", name_de)
                    else:
                        summary_row.neue_gruppe(organisation_id, name_de)

                    print(sql_statement_generator.insert_interessenbindung_parlamentarische_gruppe(
                        parlamentarier_id, organisation_id, stichdatum, beschreibung, batch_time))


                else:
                    summary_row.gruppe_unveraendert(organisation_id, name_de)

    return(summary)
Exemplo n.º 4
0
def sync_data(group_type, conn, filename, batch_time):
    backup_filename = "{}-{:02d}-{:02d}-{}".format(batch_time.year,
                                                   batch_time.month,
                                                   batch_time.day, filename)
    print("\n\n-- ----------------------------- ")
    print("-- File: {}".format(backup_filename))

    summary = Summary()

    for parlamentarier_id, nachname, vorname in db.get_active_parlamentarier(
            conn):
        summary.set_parlamentarier_name(parlamentarier_id,
                                        nachname + ", " + vorname)

    with open(filename) as data_file:
        content = json.load(data_file)
        pdf_date_str = content["metadata"]["pdf_creation_date"]
        archive_pdf_name = content["metadata"]["archive_pdf_name"]
        url = content["metadata"]["url"]
        pdf_date = datetime.strptime(
            pdf_date_str, "%Y-%m-%d %H:%M:%S")  # 2019-07-12 14:55:08
        stichdatum = pdf_date
        print("-- PDF creation date: {}".format(pdf_date))
        print("-- PDF archive file: {}".format(archive_pdf_name))
        print("-- URL: {}".format(content["metadata"]["url"]))
        print("-- ----------------------------- ")

        handle_removed_groups(group_type, content, conn, summary, stichdatum,
                              batch_time, pdf_date)

        print('\n-- Sync pgs...')

        handled_organisation_ids = []
        for group in content["data"]:
            members = group["praesidium"] + group["mitglieder"]

            organisation_id, inaktiv, name_de, name_fr, name_it, rechtsform = get_organisation(
                group, conn)

            # Skip duplicate groups: Aktive Mobilität and Langsamverkehr are twice in 23.11.2020 PDF
            if organisation_id and organisation_id in handled_organisation_ids:
                print('-- WARN: Organisation "{}" ID={} twice in PDF. Skipped'.
                      format(name_de, organisation_id))
                continue
            elif organisation_id:  # do not add None
                handled_organisation_ids.append(organisation_id)

            if organisation_id:
                handle_names(group, name_de, name_fr, name_it, organisation_id,
                             summary, conn, batch_time, pdf_date)
            else:
                print('-- INFO: Organisation "{}" not found in DB'.format(
                    name_de))

            handle_organisation(group_type, rechtsform, group, inaktiv,
                                name_de, name_fr, name_it, organisation_id,
                                summary, conn, batch_time, pdf_date)

            processed_parlamentarier_ids = []
            for member, title in members:
                names = get_names(member)
                parlamentarier_id, parlamentarier_bis = db.get_parlamentarier_id_by_name(
                    conn, names, False)

                if not parlamentarier_id:
                    print(
                        "DATA INTEGRITY FAILURE: Parlamentarier '{}' of group '{}' not found in database."
                        .format(member, name_de))
                    sys.exit(1)
                elif parlamentarier_bis and parlamentarier_bis < date.today():
                    print(
                        "-- INFO: Parlamentarier '{}' ({}) ist nicht mehr aktiv ('{}')"
                        .format(member, parlamentarier_id, parlamentarier_bis))
                    continue
                elif parlamentarier_id in processed_parlamentarier_ids:
                    print(
                        '-- WARN: Ignore duplicate member "{}" ({}) in PG "{}"'
                        .format(member, parlamentarier_id, name_de))
                    continue
                else:
                    processed_parlamentarier_ids.append(parlamentarier_id)

                art = "vorstand" if title else "mitglied"

                db_parlamentarier = db.get_parlamentarier_dict(
                    conn, parlamentarier_id)
                geschlecht = 0 if db_parlamentarier["geschlecht"] == 'M' else 1
                funktion_im_gremium = literals.president_mapping[title][
                    0] if title else None
                beschreibung = literals.president_mapping[title][1][
                    geschlecht] if title else "Mitglied"
                beschreibung_fr = literals.president_mapping[title][2][
                    geschlecht] if title else "Membre"

                interessenbindung_id = None
                if parlamentarier_id and organisation_id:
                    interessenbindung_id, db_art, db_funktion_im_gremium, db_beschreibung, db_beschreibung_fr = db.get_interessenbindung_id(
                        conn, parlamentarier_id, organisation_id, stichdatum)

                if not interessenbindung_id:
                    print(
                        "\n-- Neue Interessenbindung zwischen '{}' und '{}' als {}{}"
                        .format(
                            name_de, member, art, '/' + funktion_im_gremium
                            if funktion_im_gremium else ''))
                    if not organisation_id:
                        organisation_id = '@last_parlamentarische_gruppe'
                        summary.neue_gruppe(parlamentarier_id, "neu", name_de,
                                            art)
                    else:
                        summary.neue_gruppe(parlamentarier_id, organisation_id,
                                            name_de, art)

                    print(
                        sql_statement_generator.
                        insert_interessenbindung_parlamentarische_gruppe(
                            parlamentarier_id, organisation_id, stichdatum,
                            title != None, beschreibung, beschreibung_fr,
                            funktion_im_gremium, url, batch_time, pdf_date))
                elif art != db_art:  # Do not check funktion_im_gremium for change (simply update funktion_im_gremium)
                    print(
                        "\n-- Interessenbindungsart oder Funktion geändert zwischen '{}' und '{}': '{}', '{}'"
                        .format(name_de, member, art, funktion_im_gremium))
                    print(
                        sql_statement_generator.end_interessenbindung(
                            interessenbindung_id, stichdatum, batch_time,
                            pdf_date))
                    print(
                        sql_statement_generator.
                        insert_interessenbindung_parlamentarische_gruppe(
                            parlamentarier_id, organisation_id, stichdatum,
                            title != None, beschreibung, beschreibung_fr,
                            funktion_im_gremium, url, batch_time, pdf_date))
                    summary.gruppe_veraendert(parlamentarier_id,
                                              organisation_id, name_de, art)
                elif funktion_im_gremium != db_funktion_im_gremium or beschreibung != db_beschreibung or beschreibung_fr != db_beschreibung_fr:
                    print(
                        "\n-- Interessenbindungsbeschreibung geändert '{}': '{}' → '{}' / '{}' → '{}' / '{}' → '{}'"
                        .format(name_de, db_funktion_im_gremium,
                                funktion_im_gremium, db_beschreibung,
                                beschreibung, db_beschreibung_fr,
                                beschreibung_fr))
                    print(
                        sql_statement_generator.
                        update_beschreibung_interessenbindung(
                            interessenbindung_id, funktion_im_gremium,
                            beschreibung, beschreibung_fr, url, batch_time,
                            pdf_date))
                    summary.gruppe_veraendert(parlamentarier_id,
                                              organisation_id, name_de, art)
                else:
                    summary.gruppe_unveraendert(parlamentarier_id,
                                                organisation_id, name_de, art)

    return (summary)