Exemplo n.º 1
0
    def entry_generator(self):
        vybory = [
            entry[const.MONGO_ID] for entry in storage.MongoCollection(
                self.db, 'nodes_vybor').iterate_all()
        ]
        source_collection = storage.MongoCollection(self.db, 'parsed_zakon')

        def result_form(entry, vybor, lehota):
            return {
                const.NEO4J_BEGINNING_ID: vybor,
                const.NEO4J_ENDING_ID: entry[const.MONGO_ID],
                const.NAVRHNUTY_LEHOTA: lehota
            }

        for entry in source_collection.iterate_all():
            if const.ZAKON_ROZHODNUTIE_VYBORY in entry:
                sprava = entry[const.ZAKON_ROZHODNUTIE_VYBORY]
                if sprava == '':
                    break
                lehota = self.get_lehota(sprava)
                for vybor in vybory:
                    flag = False
                    if vybor in sprava:
                        result = result_form(entry, vybor, lehota)
                        result[
                            const.NAVRHNUTY_TYP] = const.NAVRHNUTY_DOPLNUJUCI
                        flag = True
                    if vybor in entry[const.ZAKON_ROZHODNUTIE_GESTORSKY]:
                        result = result_form(entry, vybor, lehota)
                        result[const.NAVRHNUTY_TYP] = const.NAVRHNUTY_GESTORSKY
                        flag = True
                    if flag:
                        yield result
Exemplo n.º 2
0
 def entry_generator(self):
     col_zakon = storage.MongoCollection(self.db, 'parsed_zakon')
     col_tlac = storage.MongoCollection(self.db, 'parsed_hlasovanietlace')
     for entry in col_zakon.iterate_all():
         zakon = col_tlac.get({const.MONGO_ID: entry[const.MONGO_ID]})
         hlasovania = zakon.get(const.HLASOVANIETLAC_LIST, {})
         zmeny = entry.get(const.ZAKON_ZMENY, {})
         ids = sorted(zmeny.keys())
         names = [
             zmeny[i][const.ZAKON_ZMENY_PREDKLADATEL].split(',')[0]
             for i in ids
         ]
         hlas_text = pd.Series({
             key: value[const.HLASOVANIE_NAZOV].split('Hlasovanie')[-1]
             for key, value in hlasovania.items()
             if 'druhé čítanie' in value[const.HLASOVANIE_NAZOV]
         })
         if len(hlas_text) == 0:
             continue
         counts = [0] * len(ids)
         for j, name in enumerate(names):
             if names.count(name) > 1:
                 counts[j] = names[:j + 1].count(name)
         for j, i in enumerate(ids):
             hlas_name = hlas_text[hlas_text.str.contains(names[j][:-1])]
             if counts[j] > 0:
                 hlas_name = hlas_name[hlas_name.str.contains(
                     f'{counts[j]}. návrh')]
             for id_hlas, text in hlas_name.items():
                 if not 'dopracovanie' in text and not 'preložiť' in text:
                     yield {
                         const.NEO4J_BEGINNING_ID: int(id_hlas),
                         const.NEO4J_ENDING_ID: int(i)
                     }
Exemplo n.º 3
0
 def entry_generator(self):
     source_collection = storage.MongoCollection(self.db,
                                                 'parsed_hlasovanie')
     last_entry = source_collection.get(
         {},
         projection=[const.HLASOVANIE_INDIVIDUALNE],
         sort=[(const.MONGO_ID, -1)])
     aktivni_ids = [
         int(i) for i in last_entry[const.HLASOVANIE_INDIVIDUALNE].keys()
     ]
     source_collection = storage.MongoCollection(self.db, 'parsed_poslanec')
     for entry in source_collection.iterate_all():
         if entry[const.MONGO_ID] in aktivni_ids:
             klub = const.KLUB_NEZARADENI
             typ = const.CLEN_CLEN
             for org in entry[const.POSLANEC_CLENSTVO]:
                 if org in const.KLUB_DICT:
                     klub = const.KLUB_DICT[org]
                     typ = const.CLEN_TYP_DICT[entry[
                         const.POSLANEC_CLENSTVO][org]]
                     break
             result = {
                 const.NEO4J_BEGINNING_ID: int(entry[const.MONGO_ID]),
                 const.NEO4J_ENDING_ID: klub,
                 const.CLEN_TYP: typ
             }
             yield result
Exemplo n.º 4
0
 def entry_generator(self):
     source_collection = storage.MongoCollection(self.db, 'parsed_zakon')
     col_navrh = storage.MongoCollection(
         self.db,
         'edges_poslanec_zakon_navrhol')  # TODO: fix collection naming
     col_klub = storage.MongoCollection(self.db,
                                        'edges_poslanec_klub_bol_clenom')
     col_spektrum = storage.MongoCollection(self.db,
                                            'edges_klub_spektrum_clen')
     for entry in source_collection.iterate_all():
         if const.ZAKON_NAVRHOVATEL not in entry:
             continue
         navrhovatel = entry[const.ZAKON_NAVRHOVATEL]
         zakon_id = entry[const.MONGO_ID]
         if const.NAVRHOL_VLADA.lower() in navrhovatel.lower():
             yield {
                 const.NEO4J_BEGINNING_ID: const.SPEKTRUM_KOALICIA,
                 const.NEO4J_ENDING_ID: zakon_id,
                 const.NAVRHOL_NAVRHOVATEL: navrhovatel
             }
         elif const.NAVRHOL_POSLANCI.lower() in navrhovatel.lower():
             poslanci = [
                 navrh[const.NEO4J_BEGINNING_ID]
                 for navrh in col_navrh.get_all(
                     {const.NEO4J_ENDING_ID: entry[const.MONGO_ID]})
             ]
             if not poslanci:
                 continue
             kluby = [
                 col_klub.get({const.NEO4J_BEGINNING_ID:
                               poslanec_id})[const.NEO4J_ENDING_ID]
                 for poslanec_id in poslanci
             ]
             spektrum = [
                 col_spektrum.get({const.NEO4J_BEGINNING_ID:
                                   klub})[const.NEO4J_ENDING_ID]
                 for klub in kluby
             ]
             result = {
                 const.NEO4J_ENDING_ID: zakon_id,
                 const.NAVRHOL_NAVRHOVATEL: const.NAVRHOL_POSLANCI
             }
             if spektrum.count(const.SPEKTRUM_KOALICIA) > spektrum.count(
                     const.SPEKTRUM_OPOZICIA):
                 result[const.NEO4J_BEGINNING_ID] = const.SPEKTRUM_KOALICIA
             else:
                 result[const.NEO4J_BEGINNING_ID] = const.SPEKTRUM_OPOZICIA
             yield result
Exemplo n.º 5
0
 def entry_generator(self):
     source_collection = storage.MongoCollection(self.db,
                                                 'parsed_hlasovanie')
     for entry in source_collection.iterate_all():
         del entry[const.MONGO_TIMESTAMP]
         del entry[const.HLASOVANIE_INDIVIDUALNE]
         yield entry
Exemplo n.º 6
0
 def entry_generator(self):
     source_collection = storage.MongoCollection(self.db, 'parsed_zmena')
     for entry in source_collection.iterate_all():
         entry.pop(const.ZMENA_PODPISANI, None)
         entry.pop(const.ZMENA_DALSI, None)
         entry.pop(const.ZMENA_PREDKLADATEL)
         yield entry
 def create_id_generator(self):
     collection = storage.MongoCollection(self.db, 'parsed_zakon')
     return [
         int(zmena_id)
             for zmeny in collection.get_all_attribute(const.ZAKON_ZMENY)
                 for zmena_id in zmeny
     ]
 def __init__(self, db, conf, base_url):
     super().__init__()
     name = str(self.__class__).split("'")[1].split('.')[-1].lower()
     self.db = db
     self.conf = conf
     self.collection = storage.MongoCollection(db, 'raw_' + name)
     self.base_url = base_url
Exemplo n.º 9
0
 def entry_generator(self):
     source_collection = storage.MongoCollection(self.db, 'parsed_zakon')
     for entry in source_collection.iterate_all():
         if const.ZAKON_GESTORSKY in entry:
             yield {
                 const.NEO4J_BEGINNING_ID: entry[const.ZAKON_GESTORSKY],
                 const.NEO4J_ENDING_ID: entry[const.MONGO_ID]
             }
Exemplo n.º 10
0
 def entry_generator(self):
     source_collection = storage.MongoCollection(self.db, 'parsed_zakon')
     for entry in source_collection.iterate_all():
         for zmena_id in entry.get(const.ZAKON_ZMENY, {}):
             yield {
                 const.NEO4J_BEGINNING_ID: int(zmena_id),
                 const.NEO4J_ENDING_ID: entry[const.MONGO_ID]
             }
def get_poslanec_id(db, name):  # priezvisko, meno
    col_names = storage.MongoCollection(db, 'nodes_poslanec')
    priezvisko, meno = [s.strip() for s in name.split(',')]
    poslanec = col_names.get({
        const.POSLANEC_PRIEZVISKO: priezvisko,
        const.POSLANEC_MENO: meno
    })
    return poslanec['id']
Exemplo n.º 12
0
 def __init__(self, db, conf):
     self.db = db
     self.conf = conf
     self.name = str(self.__class__).split("'")[1]
     self.target_name = utils.camel2snake(self.name.split(".")[-1])
     self.target_collection = storage.MongoCollection(self.db, self.target_name)
     self.log = logging.getLogger(self.name)
     self.batch_process = True
Exemplo n.º 13
0
 def entry_generator(self):
     source_collection = storage.MongoCollection(
         self.db, 'parsed_legislativnainiciativa')
     for entry in source_collection.iterate_all():
         for zakon_id in entry.get(const.PREDLOZILZAKON_LIST, {}):
             yield {
                 const.NEO4J_BEGINNING_ID: entry[const.MONGO_ID],
                 const.NEO4J_ENDING_ID: int(zakon_id)
             }
Exemplo n.º 14
0
 def entry_generator(self):
     source_collection = storage.MongoCollection(self.db, 'parsed_poslanec')
     orgs = set()
     for entry in source_collection.iterate_all():
         for org in entry[const.POSLANEC_CLENSTVO]:
             if const.POSLANEC_DELEGACIA.lower() in org.lower():
                 orgs.add(org)
     for org in orgs:
         yield {const.MONGO_ID: org}
Exemplo n.º 15
0
 def entry_generator(self):
     source_collection = storage.MongoCollection(self.db,
                                                 'parsed_hlasovanietlace')
     for entry in source_collection.iterate_all():
         for hlasovanie_id in entry.get(const.HLASOVANIETLAC_LIST, {}):
             yield {
                 const.NEO4J_BEGINNING_ID: int(hlasovanie_id),
                 const.NEO4J_ENDING_ID: entry[const.MONGO_ID]
             }
Exemplo n.º 16
0
 def entry_generator(self):
     source_collection = storage.MongoCollection(self.db, 'parsed_rozprava')
     for entry in source_collection.iterate_all():
         for vystupenie in entry[const.ROZPRAVA_VYSTUPENIA]:
             if const.ROZPRAVA_TLAC in vystupenie:
                 yield {
                     const.NEO4J_BEGINNING_ID: vystupenie[const.MONGO_ID],
                     const.NEO4J_ENDING_ID: vystupenie[const.ROZPRAVA_TLAC]
                 }
Exemplo n.º 17
0
def get_collection(obj, conf, stage, db):
    conf_collections = conf[const.CONF_MONGO][const.CONF_MONGO_DATABASE][
        const.CONF_MONGO_COLLECTION]
    prefix = conf_collections[stage]
    if isinstance(obj, str):
        suffix = obj
    else:
        suffix = str(obj.__class__).split("'")[1].split(".")[-1].lower()
    name = "_".join([prefix, suffix])
    return storage.MongoCollection(db, name)
Exemplo n.º 18
0
 def entry_generator(self):
     source_collection = storage.MongoCollection(self.db, 'parsed_zmena')
     for entry in source_collection.iterate_all():
         for poslanec in entry.get(const.ZMENA_PODPISANI, []):
             yield {
                 const.NEO4J_BEGINNING_ID:
                 utils.get_poslanec_id(self.db, poslanec),
                 const.NEO4J_ENDING_ID:
                 entry[const.MONGO_ID]
             }
Exemplo n.º 19
0
 def entry_generator(self):
     source_collection = storage.MongoCollection(self.db, 'parsed_poslanec')
     for entry in source_collection.iterate_all():
         for org, typ in entry[const.POSLANEC_CLENSTVO].items():
             if const.POSLANEC_DELEGACIA.lower() in org.lower():
                 result = {
                     const.NEO4J_BEGINNING_ID: entry[const.MONGO_ID],
                     const.NEO4J_ENDING_ID: org,
                     const.CLEN_TYP: const.CLEN_TYP_DICT[typ]
                 }
                 yield result
    def store_raw_html(self, entry_id):
        ids_collection = storage.MongoCollection(self.db, 'edges_poslanec_rozprava_vystupil')
        stored_ids = [
            entry[const.NEO4J_ENDING_ID]
                for entry in ids_collection.get_all({const.NEO4J_BEGINNING_ID: entry_id}, projections={const.NEO4J_ENDING_ID})
        ]

        entry_pages = [
            entry['page']
                for entry in self.collection.get_all({'id': entry_id}, projections={'page'})
        ]
        if entry_pages:
            min_page = min(entry_pages)

        url = self.base_url.format(entry_id)
        br = robobrowser.RoboBrowser(parser='html.parser', history=False)
        br.open(url)

        last_page = False
        page = 1
        while True:
            if not br.parsed.select('#_sectionLayoutContainer_ctl01__resultGrid'):
                break
            rozpravy_ids = [
                int(span.find('a')['href'].split('=')[-1])
                    for span in br.parsed('span', attrs={'class': 'daily_info_speech_header_right'})
                        if span.find('a')
            ]
            if rozpravy_ids[0] in stored_ids:
               break
            if rozpravy_ids[-1] in stored_ids:
                last_page = True
            if entry_pages:
                store_page = min_page - page
            else:
                store_page = page
            data = {
                'url': url,
                'html': str(br.parsed),
                'page': store_page,
                'id': entry_id
            }
            self.collection.update(data, ['url', 'page'])
            sleep(self.conf['scrape']['delay'])
            if last_page:
                break
            page += 1
            form = br.get_form(id='_f')
            form.add_field(rbfields.Input(f'<input name="__EVENTARGUMENT" value="Page${page}" />'))
            form.add_field(rbfields.Input('<input name="__EVENTTARGET" value="_sectionLayoutContainer$ctl01$_resultGrid" />'))
            form.fields.pop('_sectionLayoutContainer$ctl01$_searchButton')
            br.submit_form(form)
Exemplo n.º 21
0
 def entry_generator(self):
     source_collection = storage.MongoCollection(self.db, 'parsed_zakon')
     fields = [
         const.MONGO_ID, const.MONGO_TIMESTAMP, const.ZAKON_STAV,
         const.ZAKON_VYSLEDOK, const.ZAKON_DRUH, const.ZAKON_NAZOV,
         const.MONGO_URL, const.MONGO_UNIQUE_ID, const.ZAKON_DATUM_DORUCENIA
     ]
     for entry in source_collection.iterate_all():
         yield {
             field:
             entry[field] if field in entry else const.NEO4J_NULLVALUE
             for field in fields
         }
Exemplo n.º 22
0
 def entry_generator(self):
     source_collection = storage.MongoCollection(self.db,
                                                 'parsed_hlasovanie')
     last_entry = source_collection.get(
         {},
         projection=[const.HLASOVANIE_INDIVIDUALNE],
         sort=[(const.MONGO_ID, -1)])
     hlasy = last_entry[const.HLASOVANIE_INDIVIDUALNE].values()
     kluby = [value[const.HLASOVANIE_KLUB] for value in hlasy]
     values, counts = np.unique(kluby, return_counts=True)
     for val, count in zip(values, counts):
         val = utils.parse_klub(val)
         entry = {const.MONGO_ID: val, const.KLUB_POCET: int(count)}
         yield entry
Exemplo n.º 23
0
 def entry_generator(self):
     source_collection = storage.MongoCollection(self.db, 'parsed_rozprava')
     for entry in source_collection.iterate_all():
         for vystupenie in entry[const.ROZPRAVA_VYSTUPENIA]:
             klub = vystupenie[const.ROZPRAVA_POSLANEC_KLUB]
             klub = const.KLUB_DICT.get('Klub ' + klub,
                                        const.NEO4J_NULLVALUE)
             yield {
                 const.NEO4J_BEGINNING_ID:
                 entry[const.MONGO_ID],
                 const.NEO4J_ENDING_ID:
                 vystupenie[const.MONGO_ID],
                 const.ROZPRAVA_POSLANEC_KLUB:
                 klub,
                 const.ROZPRAVA_POSLANEC_TYP:
                 vystupenie[const.ROZPRAVA_POSLANEC_TYP]
             }
Exemplo n.º 24
0
 def entry_generator(self):
     source_collection = storage.MongoCollection(self.db,
                                                 'parsed_hlasovanie')
     for entry in source_collection.iterate_all():
         for poslanec_id, poslanec in entry[
                 const.HLASOVANIE_INDIVIDUALNE].items():
             hlas = {
                 const.NEO4J_BEGINNING_ID:
                 int(poslanec_id),
                 const.NEO4J_ENDING_ID:
                 entry[const.MONGO_ID],
                 const.HLASOVAL_HLAS:
                 const.HLASOVAL_HLAS_DICT[poslanec[const.HLASOVANIE_HLAS]],
                 const.HLASOVAL_KLUB:
                 utils.parse_klub(poslanec[const.HLASOVANIE_KLUB])
             }
             yield hlas
Exemplo n.º 25
0
 def entry_generator(self):
     source_collection = storage.MongoCollection(self.db, 'parsed_rozprava')
     pop_fields = [
         const.ROZPRAVA_TLAC, const.ROZPRAVA_POSLANEC_ID,
         const.ROZPRAVA_POSLANEC_PRIEZVISKO, const.ROZPRAVA_POSLANEC_MENO,
         const.ROZPRAVA_POSLANEC_KLUB, const.ROZPRAVA_POSLANEC_TYP
     ]
     include_fields = [const.MONGO_TIMESTAMP, const.MONGO_URL]
     for entry in source_collection.iterate_all():
         for vystupenie in entry[const.ROZPRAVA_VYSTUPENIA]:
             for field in pop_fields:
                 vystupenie.pop(field, None)
             for field in include_fields:
                 vystupenie[field] = entry[field]
             vystupenie[
                 const.ROZPRAVA_DLZKA] = self.compute_dlzka_vystupenia(
                     vystupenie[const.ROZPRAVA_CAS_ZACIATOK],
                     vystupenie[const.ROZPRAVA_CAS_KONIEC])
             yield vystupenie
Exemplo n.º 26
0
 def entry_generator(self):
     source_collection = storage.MongoCollection(self.db, 'parsed_zmena')
     for entry in source_collection.iterate_all():
         yield {
             const.NEO4J_BEGINNING_ID:
             utils.get_poslanec_id(self.db,
                                   entry[const.ZMENA_PREDKLADATEL]),
             const.NEO4J_ENDING_ID:
             entry[const.MONGO_ID],
             const.NAVRHOL_NAVRHOVATEL:
             const.NAVRHOL_HLAVNY
         }
         for poslanec in entry.get(const.ZMENA_DALSI, []):
             yield {
                 const.NEO4J_BEGINNING_ID:
                 utils.get_poslanec_id(self.db, poslanec),
                 const.NEO4J_ENDING_ID:
                 entry[const.MONGO_ID],
                 const.NAVRHOL_NAVRHOVATEL:
                 const.NAVRHOL_DALSI
             }
Exemplo n.º 27
0
 def entry_generator(self):
     source_collection = storage.MongoCollection(self.db,
                                                 'parsed_hlasovanie')
     poslanci = {}
     for entry in source_collection.iterate_all():
         for poslanec_id in entry[const.HLASOVANIE_INDIVIDUALNE]:
             if poslanec_id in poslanci:
                 if poslanci[poslanec_id][const.CLEN_NAPOSLEDY] > entry[
                         const.HLASOVANIE_CAS]:
                     continue
             values = {
                 const.NEO4J_BEGINNING_ID:
                 int(poslanec_id),
                 const.NEO4J_ENDING_ID:
                 utils.parse_klub(entry[const.HLASOVANIE_INDIVIDUALNE]
                                  [poslanec_id][const.HLASOVANIE_KLUB]),
                 const.CLEN_NAPOSLEDY:
                 entry[const.HLASOVANIE_CAS]
             }
             poslanci[poslanec_id] = values
     for entry in poslanci.values():
         yield entry
 def create_id_generator(self):
     collection = storage.MongoCollection(self.db, 'parsed_poslanec')
     return collection.get_all_attribute('id')
 def __init__(self, db, conf):
     super().__init__()
     name = str(self.__class__).split("'")[1].split('.')[-1].lower()
     self.source_collection = storage.MongoCollection(db, 'raw_' + name)
     self.target_collection = storage.MongoCollection(db, 'parsed_' + name)
     self.unique_ids = ['id']
Exemplo n.º 30
0
 def entry_generator(self):
     source_collection = storage.MongoCollection(self.db, 'parsed_poslanec')
     for entry in source_collection.iterate_all():
         del entry[const.POSLANEC_CLENSTVO]
         yield entry