def flow(*_):
    return Flow(
        get_all_reports(),
        calculate_publication_id(1),
        set_type('start_date', type='date', format='%d-%m-%Y'),
        set_primary_key(['publication_id']),
        update_resource(-1, name='criteria', **{PROP_STREAMING: True}),
    )
Пример #2
0
def flow(*_):
    return Flow(
        fetch_results(),
        set_type('start_date', type='date', format='%d.%m.%Y'),
        process_kind,
        calculate_publication_id(2),
        set_primary_key(['publication_id']),
        update_resource(-1, name='jobiz', **{PROP_STREAMING: True}),
    )
Пример #3
0
def flow(*args):
    return Flow(
        get_results(),
        update_resource(-1, name='molsa', **{PROP_STREAMING: True}),
        calculate_publication_id(4),
        set_type('claim_date',
                 resources='molsa',
                 type='datetime',
                 format='%d/%m/%Y %H:%M'),
    )
Пример #4
0
def flow(*args):
    return Flow(
        m_tmicha_scraper(),
        calculate_publication_id(5),
        update_resource(
            -1, name='support_criteria_from_ministry_of_health',
            **{
                PROP_STREAMING: True
            }
        )
    )
def flow(*_, ):
    return DF.Flow(
        scrape(),
        DF.update_resource(-1, **{
            'name': 'class_action',
            'dpp:streaming': True
        }),
        DF.set_type('claim_date',
                    type='datetime',
                    format='%d/%m/%Y',
                    resources=-1),
        calculate_publication_id(8),
    )
Пример #6
0
def flow(*args):
    return Flow(
        fetch_calls(),
        call_details(),
        resolve_ordering_unit(),
        calculate_publication_id(3),
        fix_documents(),
        update_resource(
            -1, name='molsa',
            **{
                PROP_STREAMING: True
            }
        ),
        printer()
    )
Пример #7
0
def flow(*_):
    return DF.Flow(
        scrape(),
        DF.update_resource(-1, **{
            'dpp:streaming': True,
            'name': 'btl'
        }),
        DF.set_type('claim_date',
                    type='datetime',
                    format='%d/%m/%Y %H:%M',
                    resources=-1),
        DF.set_type('start_date', type='date', format='%d/%m/%Y',
                    resources=-1),
        calculate_publication_id(7),
    )
def flow(*_):
    return DF.Flow(
        scraper(),
        DF.filter_rows(lambda row: row['page_title'] and row['page_title'].
                       startswith('קול קורא'),
                       resources=-1),
        DF.set_type('start_date', type='date', format='%d/%m/%Y',
                    resources=-1),
        DF.set_type('claim_date',
                    type='datetime',
                    format='%d/%m/%Y',
                    resources=-1),
        calculate_publication_id(9),
        DF.validate(),
        DF.update_resource(-1, name='negev_galil', **{PROP_STREAMING: True}),
    )
Пример #9
0
def flow(*_):
    return DF.Flow(
        DF.load(URL, format='json', property='jData', name='education'),
        # DF.checkpoint('education'),
        DF.concatenate(dict(
            page_title=['Title'],
            start_date=['PobKKPublishingDate'],
            claim_date=['PobLastDate'],
            target_audience_x=['PobBudgetEntitties'],
            description=['PobTaktzir'],
            email=['PobPedagogyContactHtml'],
            publishing_unit_x=['PobYechida'],
            budget_code_x=['PobTakanaTaktzivitString'],
            att_title=['PobCreteriaLink_description'],
            att_url=['PobCreteriaLink_url'],
        ), resources=-1, target=dict(name='education')),
        enumerate_titles,
        DF.add_field('page_url', 'string', PAGE_URL, resources=-1),
        DF.add_field('publisher', 'string', 'משרד החינוך', resources=-1),
        DF.add_field('tender_type', 'string', 'call_for_bids', resources=-1),
        DF.add_field('tender_type_he', 'string', 'קול קורא', resources=-1),
        DF.add_field('publication_id', 'integer', 0, resources=-1),
        DF.add_field('tender_id', 'string', '0', resources=-1),
        DF.add_field('tender_type_he', 'string', 'קול קורא', resources=-1),
        DF.add_field('contact', 'string', lambda row: extract_hebrew(row, 'email'), resources=-1),
        DF.add_field('target_audience', 'string', lambda row: extract_hebrew(row, 'target_audience_x'), resources=-1),
        DF.add_field('contact_email', 'string', lambda row: extract_email(row, 'email'), resources=-1),
        DF.add_field('publishing_unit', 'string', lambda row: row['publishing_unit_x'][0]['PobYechida'], resources=-1),
        DF.add_field('budget_code', 'string', lambda row: extract_budget_code(row, 'budget_code_x'), resources=-1),
        DF.set_type('start_date', type='date', format='%d/%m/%Y %H:%M:%S'),
        DF.set_type('claim_date', type='datetime', format='%d/%m/%Y %H:%M:%S'),
        DF.add_field('documents', 'array',
                     lambda row: [dict(
                         description=row['att_title'],
                         link=row['att_url'],
                         update_time=str(row['start_date'])
                     )], resources=-1),
        DF.delete_fields(['email', 'publishing_unit_x', 'budget_code_x', 'att_title', 'att_url', 'target_audience_x'], resources=-1),
        calculate_publication_id(6),
        DF.update_resource(-1, **{'dpp:streaming': True})
    )
def flow(*_):
    return DF.Flow(
        scraper(),
        DF.filter_rows(lambda row: row['page_title'] and row['page_title'].startswith('קול קורא'), resources=-1),
        page_parser(),
        DF.add_field('decision', 'string',
                     default=lambda row: row['parsed']['decision'], resources=-1),
        DF.add_field('start_date', 'date', format='%d/%m/%Y',
                     default=lambda row: row['parsed']['start_date'], resources=-1),
        DF.add_field('claim_date', 'datetime', format='%d/%m/%Y',
                     default=lambda row: row['parsed']['claim_date'], resources=-1),
        DF.add_field('documents', 'array',
                     default=lambda row: row['parsed']['documents'], resources=-1),
        DF.delete_fields(['parsed'], resources=-1),
        calculate_publication_id(9),
        DF.validate(),
        DF.update_resource(
            -1, name='negev_galil',
            **{
                PROP_STREAMING: True
            }
        ),
    )