Exemplo n.º 1
0
def wf13_match_contribution_to_PDF_file():
    legislature = ask_for_wahlperiode()
    dir_loc = f'./parli_data/wf11_sessions/WP{legislature}/'
    url_base = 'https://www.landtag.nrw.de/portal/WWW/dokumentenarchiv/Dokument?Id='
    if os.path.isdir(dir_loc):
        print(dir_loc)
    wp = _open_dilled_wp(legislature)

    print(f'{wp.number_of_MdLs} parliamentarians in legislature {wp.wahlperiode}.')

    for _, mdl in wp.MdLs.items():
        contributions = copy.deepcopy(mdl.contributions)
        #print(mdl.key)
        for protocol_nr, contribution in contributions.items():
            _match_to_file(wp, mdl, protocol_nr, contribution)

    #for _, mdl in wp.MdLs.items():
    #    print(mdl.key)
    #    for protocol_nr, contribution in mdl.contributions.items():
    #        print(protocol_nr)
    #        print(contribution)
    #    print()

    dir_loc = f'./parli_data/wf13_contributions/'
    os.makedirs(dir_loc, exist_ok=True)

    file_loc = dir_loc + 'WP_{}.dill'.format(legislature)
    with open(file_loc, 'wb') as fout:
        dill.dump(wp, fout)
def wf11_download_session_pdfs():
    '''
    '''
    wahlperiode = ask_for_wahlperiode()
    dir_loc = './parli_data/'
    dir_loc = dir_loc + 'wf11_sessions/WP{}/'.format(wahlperiode)
    if not os.path.isdir(dir_loc):
        os.makedirs(dir_loc)

    urls = _create_session_urls(wahlperiode)
    url_base = 'https://www.landtag.nrw.de/portal/WWW/dokumentenarchiv/Dokument?Id='
    for url in urls:
        url_name = url.split('?Id=')[-1] + '.pdf'
        file_loc = dir_loc + url_name
        if os.path.isfile(file_loc):
            print('.', end='')
            pass
        else:
            url = url_base + url.split('Id=')[-1]
            response = _get_response(url)
            print('*', end='')
            with open(file_loc, 'wb') as fout:
                fout.write(response.content)
    print()
    return None
Exemplo n.º 3
0
def _open_dilled_wp():
    '''
    '''
    wahlperiode = ask_for_wahlperiode()
    dir_loc = './parli_data/wf09_dilled_wps/'
    file_loc = dir_loc + 'WP_{}.dill'.format(wahlperiode)

    with open(file_loc, 'rb') as fin:
        wp = dill.load(fin)

    return wp
Exemplo n.º 4
0
def wf02_extract_wiki(legislature=None):
    """
    """

    if not legislature:
        legislature = ask_for_wahlperiode()

    bsObj = _get_bsObj(legislature)
    mdls = _collect_mdls(bsObj, legislature)

    return mdls
def wf03_mk_top_container_wahlperiode(wahlperiode=None):
    """
    Creates a class Wahlperiode container that has all the informations that
    could be extracted from the bsObj.
    Create a key from these informations that will be unique for each
    parlamentarian.
    Key looks like this:
        lastname_firstname_electoralward_legislature
    If there is no electoral ward available, "ew" will placehold instead.

    Returns wp
    """

    if not wahlperiode:
        wahlperiode = ask_for_wahlperiode()
    wp = Wahlperiode(int(wahlperiode))

    mdls = wf02_extract_all_infos_about_MdLs(wahlperiode)
    total = len(mdls)
    counter = 0
    for _, mdl_ in mdls.items():
        key_ = mdl_.key
        key = '{}_{}_{}_{}'.format(mdl_.last_name, mdl_.first_name,\
                mdl_.electoral_ward, mdl_.legislature)
        if key != key_:
            raise Exception('wf03 needs attention')
        if key not in wp.MdLs:
            wp.MdLs[key] = mdl_
            counter += 1
        else:
            mdl = wp.MdLs[key]
            mdl = _append_to_dict_entry(key, mdl, mdl_)
            wp.MdLs[key] = mdl
            total -= 1
            print('total', total)

    print('{} of {} MdLs'.format(counter, total))
    if counter != total:
        raise Exception(
            "The number of MdLs and the number of names are not equal.")

    wp.number_of_MdLs = counter
    names = list(set(wp.names))
    names = _bubblesort(names)
    wp.names = names
    print('Concludes wf03 by returning wp.names')

    return wp
def _open_dilled_wp():
    '''
    '''
    legislature = ask_for_wahlperiode()
    try:
        dir_ = f'/home/sam/projects/vEnvs/parli_NRW/parli_NRW/data/WP{legislature}/'
        print(os.listdir(dir_))
        latest_file = dir_ + sorted(os.listdir(dir_))[-1]
        print(f'opening: {latest_file}')
        with open(latest_file, 'rb') as fin:
            wp = dill.load(fin)
    except (FileNotFoundError, IsADirectoryError):
        try:
            print(f'did not find {latest_file}')
            dir_local = f'./parli_data/wf15_dilled_wps/'
            file_local = dir_local + 'WP_{}.dill'.format(legislature)
            print(f'opening file: {file_local}')
            with open(file_local, 'rb') as fin:
                wp = dill.load(fin)
        except FileNotFoundError:
            dir_loc = f'./parli_data/wf13_contributions/'
            file_loc = dir_loc + 'WP_{}.dill'.format(legislature)
            print(f'opening file: {file_loc}')
            with open(file_loc, 'rb') as fin:
                wp = dill.load(fin)
    except IndexError:
        try:
            print(f'did not find a file in {dir_}')
            dir_local = f'./parli_data/wf15_dilled_wps/'
            file_local = dir_local + 'WP_{}.dill'.format(legislature)
            print(f'opening file: {file_local}')
            with open(file_local, 'rb') as fin:
                wp = dill.load(fin)
        except FileNotFoundError:
            dir_loc = f'./parli_data/wf13_contributions/'
            file_loc = dir_loc + 'WP_{}.dill'.format(legislature)
            print(f'opening file: {file_loc}')
            with open(file_loc, 'rb') as fin:
                wp = dill.load(fin)

    return wp
def wf01_save_wiki_bsObj():
    """
    Gets source code of the site

    https://de.wikipedia.org/wiki/Liste_der_Mitglieder_des_Landtages_Nordrhein-Westfalen_(17._Wahlperiode)

    at the Wikipedia with all the parlamentarians (MdLs) of a given
    "Wahlperiode" (legislature).

    The url and the legislature will be asked for. Currently there are
    Wahlperiode 10 to 17 available.
    Returns nothing, but saves the bsObj as a file for further use.

    Saves: wikiListe_WP10.soup - wikiListe_WP17.soup
    Returns: True, if bsObj is downloaded and saved, otherwise False
    """

    url_with_all_MdLs = 'https://de.wikipedia.org/wiki/Liste_der_Mitglieder_des_Landtages_Nordrhein-Westfalen_({}._Wahlperiode)'

    wahlperiode = ask_for_wahlperiode()
    url = url_with_all_MdLs.format(wahlperiode)
    print(url)

    dir_loc = "./parli_data/wf01_soup_objects/"
    file_loc = dir_loc + "wikiListe_WP{}.soup".format(wahlperiode)
    if os.path.isdir(dir_loc):
        if os.path.exists(file_loc):
            check = input("File exists, overwrite? y/n")
            if check == "y":
                if _download_and_save_bsObj(url, file_loc, wahlperiode):
                    return True
        else:
            if _download_and_save_bsObj(url, file_loc, wahlperiode):
                return True
    else:
        os.mkdir(dir_loc)
        if _download_and_save_bsObj(url, file_loc, wahlperiode):
            return True

    return False
Exemplo n.º 8
0
def wf01_save_bsObj():
    """
    Gets source code of the site

    "https://www.landtag.nrw.de/portal/WWW/Webmaster/GB_II/II.2/Suche/Landtagsdokumentation_ALWP/Initiativen_Reden_von_Abgeordneten.jsp"

    at the Landtag NRW with all the parlamentarians (MdLs) of a given
    "Wahlperiode".

    The url and the Wahlperiode will be asked for. Currently there are
    Wahlperiode 10 to 17 available.
    Returns nothing, but saves the bsObj as a file for further use.

    Saves: namenListe_WP10.soup - namenListe_WP17.soup
    Returns: True, if bsObj is downloaded and saved, otherwise False
    """

    url_with_all_MdLs = "https://www.landtag.nrw.de/portal/WWW/Webmaster/GB_II/II.2/Suche/Landtagsdokumentation_ALWP/Initiativen_Reden_von_Abgeordneten.jsp?beg=ges&umfang=redner&wp={}"

    wahlperiode = ask_for_wahlperiode()
    url = url_with_all_MdLs.format(wahlperiode)
    print(url)

    dir_loc = "./parli_data/wf01_soup_objects/"
    file_loc = dir_loc + "namenListe_WP{}.soup".format(wahlperiode)
    if os.path.isdir(dir_loc):
        if os.path.exists(file_loc):
            check = input("File exists, overwrite? y/n")
            if check == "y":
                if _download_and_save_bsObj(url, file_loc, wahlperiode):
                    return True
        else:
            if _download_and_save_bsObj(url, file_loc, wahlperiode):
                return True
    else:
        os.mkdir(dir_loc)
        if _download_and_save_bsObj(url, file_loc, wahlperiode):
            return True

    return False
Exemplo n.º 9
0
def wf02_extract_all_infos_about_MdLs(legislature=None, verbose=False):
    """
    Extracting the bsObj for all available infos about MdLs and legislature.
    Necessary to create a key and a dict later.

    Informations:   last name
                    first name
                    electoral ward
                    party
                    office
                    peer title
                    academic title
                    parliament president
                    legislature

    If there is no first name available, "fn" will placehold instead.
    Yields a named tuple with all variables called 'mdl' with name of
    elctoral ward or 'ew' if none, also with first name or 'fn' if none.
    Otherwise as defined in wf00_base_classes, class "MdL", either an
    empty list or "False".
    Additionally yields legislature and line that contains those information
    for later checking.

    Returns dict "mdls"
    """

    if not legislature:
        leg = ask_for_wahlperiode()  # wahlperiode: legislature
    else:
        leg = legislature

    wp = Wahlperiode(int(leg))
    bsObj = _get_bsObj(leg)
    mdls = _collect_mdls(bsObj, wp, leg, verbose)
    print(
        'Concludes wf02 with returning mdls (a dictionary with all MdLs of a legislature).'
    )

    return mdls
Exemplo n.º 10
0
                    return first_name
            else:
                try:
                    if party.upper() == mdl_wiki[-1].upper():
                        first_name = mdl_wiki[0].split(',')[-1]
                        first_name = first_name.upper().strip()
                        return first_name
                except TypeError:
                    print('party', party)
                    print('mdl_wiki', mdl_wiki)
                    raise Exception
                except AttributeError:
                    print('last_name, first_name', last_name, first_name)
                    print(line)
                    print('party', party)
                    print('mdl_wiki', mdl_wiki)
                    raise Exception

    return first_name


if __name__ == "__main__":
    wahlperiode = ask_for_wahlperiode()
    verbose = False
    mdls = wf02_extract_all_infos_about_MdLs(wahlperiode, verbose)
    for key, mdl in mdls.items():
        #if mdl.parl_pres:
        #    print(f'{mdl.first_name} {mdl.last_name}, {mdl.party}, {mdl.parl_pres}')
        #print(f'{mdl.first_name} {mdl.last_name}, {mdl.party}')
        printout(mdl)
Exemplo n.º 11
0
 def chose_term(self):
     self.legislature = ask_for_wahlperiode()