def get_updated_sources():
    import requests
    from pyquery import PyQuery as pq
    URL = 'https://mr.gov.il/ilgstorefront/he/news/details/230920201036'
    sources = []

    page = pq(requests.get(URL).text)
    anchors = page.find('a')
    for anchor in anchors:
        anchor = pq(anchor)
        href = anchor.attr('href')
        if '.zip' in href:
            sources.append(href + '#.xlsx')
    sources = [
        DF.load(source,
                format='excel-xml',
                encoding='utf8',
                bytes_sample_size=0) for source in sources
    ]
    if len(sources) != 2:
        return DF.Flow(
            data_gov_il_resource.flow(tenders),
            data_gov_il_resource.flow(exemptions),
        )
    else:
        return DF.Flow(*sources)
def flow(*_):
    print('reading companies...')
    return Flow(
        data_gov_il_resource.flow(companies),
        fix_values(),
        concatenate(_get_columns_mapping_dict(),
                    target=dict(name='company-details')),
        set_type('id', type='string'),
        set_type('company_street_number', type='string'),
        set_type('company_registration_date', type='date', format='%d/%m/%Y'),
        set_type('company_is_government',
                 type='boolean',
                 falseValues=['לא'],
                 trueValues=['כן']),
        set_type('company_is_mafera',
                 type='boolean',
                 falseValues=['לא'],
                 trueValues=['מפרה', 'התראה']),
        set_type('company_last_report_year', type='integer'),
        set_type('company_postal_code', type='string'),
        clear_bool_values,
        update_resource(**{'dpp:streaming': True},
                        resources='company-details'),
        set_primary_key(['id'], resources='company-details'),
        printer(),
    )
Exemplo n.º 3
0
def batch_flow(parameters):
    gcd = google_chrome_driver()
    return Flow(*[flow(dict(**p, gcd=gcd)) for p in parameters['batch']])
Exemplo n.º 4
0
def batch_flow(parameters):
    return Flow(*[flow(p) for p in parameters['batch']])