def flow(*_): return Flow( get_all_reports(), calculate_publication_id(1), set_type('start_date', type='date', format='%d-%m-%Y'), set_primary_key(['publication_id']), update_resource(-1, name='criteria', **{PROP_STREAMING: True}), )
def flow(*_): return Flow( fetch_results(), set_type('start_date', type='date', format='%d.%m.%Y'), process_kind, calculate_publication_id(2), set_primary_key(['publication_id']), update_resource(-1, name='jobiz', **{PROP_STREAMING: True}), )
def flow(*args): return Flow( get_results(), update_resource(-1, name='molsa', **{PROP_STREAMING: True}), calculate_publication_id(4), set_type('claim_date', resources='molsa', type='datetime', format='%d/%m/%Y %H:%M'), )
def flow(*args): return Flow( m_tmicha_scraper(), calculate_publication_id(5), update_resource( -1, name='support_criteria_from_ministry_of_health', **{ PROP_STREAMING: True } ) )
def flow(*_, ): return DF.Flow( scrape(), DF.update_resource(-1, **{ 'name': 'class_action', 'dpp:streaming': True }), DF.set_type('claim_date', type='datetime', format='%d/%m/%Y', resources=-1), calculate_publication_id(8), )
def flow(*args): return Flow( fetch_calls(), call_details(), resolve_ordering_unit(), calculate_publication_id(3), fix_documents(), update_resource( -1, name='molsa', **{ PROP_STREAMING: True } ), printer() )
def flow(*_): return DF.Flow( scrape(), DF.update_resource(-1, **{ 'dpp:streaming': True, 'name': 'btl' }), DF.set_type('claim_date', type='datetime', format='%d/%m/%Y %H:%M', resources=-1), DF.set_type('start_date', type='date', format='%d/%m/%Y', resources=-1), calculate_publication_id(7), )
def flow(*_): return DF.Flow( scraper(), DF.filter_rows(lambda row: row['page_title'] and row['page_title']. startswith('קול קורא'), resources=-1), DF.set_type('start_date', type='date', format='%d/%m/%Y', resources=-1), DF.set_type('claim_date', type='datetime', format='%d/%m/%Y', resources=-1), calculate_publication_id(9), DF.validate(), DF.update_resource(-1, name='negev_galil', **{PROP_STREAMING: True}), )
def flow(*_): return DF.Flow( DF.load(URL, format='json', property='jData', name='education'), # DF.checkpoint('education'), DF.concatenate(dict( page_title=['Title'], start_date=['PobKKPublishingDate'], claim_date=['PobLastDate'], target_audience_x=['PobBudgetEntitties'], description=['PobTaktzir'], email=['PobPedagogyContactHtml'], publishing_unit_x=['PobYechida'], budget_code_x=['PobTakanaTaktzivitString'], att_title=['PobCreteriaLink_description'], att_url=['PobCreteriaLink_url'], ), resources=-1, target=dict(name='education')), enumerate_titles, DF.add_field('page_url', 'string', PAGE_URL, resources=-1), DF.add_field('publisher', 'string', 'משרד החינוך', resources=-1), DF.add_field('tender_type', 'string', 'call_for_bids', resources=-1), DF.add_field('tender_type_he', 'string', 'קול קורא', resources=-1), DF.add_field('publication_id', 'integer', 0, resources=-1), DF.add_field('tender_id', 'string', '0', resources=-1), DF.add_field('tender_type_he', 'string', 'קול קורא', resources=-1), DF.add_field('contact', 'string', lambda row: extract_hebrew(row, 'email'), resources=-1), DF.add_field('target_audience', 'string', lambda row: extract_hebrew(row, 'target_audience_x'), resources=-1), DF.add_field('contact_email', 'string', lambda row: extract_email(row, 'email'), resources=-1), DF.add_field('publishing_unit', 'string', lambda row: row['publishing_unit_x'][0]['PobYechida'], resources=-1), DF.add_field('budget_code', 'string', lambda row: extract_budget_code(row, 'budget_code_x'), resources=-1), DF.set_type('start_date', type='date', format='%d/%m/%Y %H:%M:%S'), DF.set_type('claim_date', type='datetime', format='%d/%m/%Y %H:%M:%S'), DF.add_field('documents', 'array', lambda row: [dict( description=row['att_title'], link=row['att_url'], update_time=str(row['start_date']) )], resources=-1), DF.delete_fields(['email', 'publishing_unit_x', 'budget_code_x', 'att_title', 'att_url', 'target_audience_x'], resources=-1), calculate_publication_id(6), DF.update_resource(-1, **{'dpp:streaming': True}) )
def flow(*_): return DF.Flow( scraper(), DF.filter_rows(lambda row: row['page_title'] and row['page_title'].startswith('קול קורא'), resources=-1), page_parser(), DF.add_field('decision', 'string', default=lambda row: row['parsed']['decision'], resources=-1), DF.add_field('start_date', 'date', format='%d/%m/%Y', default=lambda row: row['parsed']['start_date'], resources=-1), DF.add_field('claim_date', 'datetime', format='%d/%m/%Y', default=lambda row: row['parsed']['claim_date'], resources=-1), DF.add_field('documents', 'array', default=lambda row: row['parsed']['documents'], resources=-1), DF.delete_fields(['parsed'], resources=-1), calculate_publication_id(9), DF.validate(), DF.update_resource( -1, name='negev_galil', **{ PROP_STREAMING: True } ), )