def parsed_items(): freezer = freeze_time('2018-10-12 12:00:00') freezer.start() minutes_req = file_response('files/chi_ssa_5_minutes.html') minutes_req.meta['items'] = spider._parse_current_year( file_response('files/chi_ssa_5.html')) parsed_items = [item for item in spider._parse_minutes(minutes_req)] freezer.stop() return parsed_items
def parsed_items(): freezer = freeze_time('2018-11-07') freezer.start() spider = ChiSsa42Spider() res = file_response('files/chi_ssa_42.html') minutes_res = file_response('files/chi_ssa_42_minutes.html') parsed_items = [item for item in spider._parse_items(res, upcoming=True) ] + [item for item in spider._parse_items(minutes_res)] freezer.stop() return parsed_items
def parsed_items(): freezer = freeze_time('2018-10-31') freezer.start() spider = ChiLowIncomeHousingTrustFundSpider() cal_res = file_response('files/chi_low_income_housing_trust_fund.html') parsed_items = [] for item in spider._parse_calendar(cal_res): detail_res = file_response( 'files/chi_low_income_housing_trust_fund_detail.html') detail_res.meta['item'] = item parsed_items.append(spider._parse_detail(detail_res)) freezer.stop() return parsed_items
def parsed_item(): freezer = freeze_time('2018-01-01 12:00:01') freezer.start() item = file_response('files/chi_citycouncil_event.json', url=INITIAL_REQUEST) parsed = spider._parse_item(item) freezer.stop() return parsed
def parsed_items(): freezer = freeze_time('2018-11-07') freezer.start() spider = ChiSsa17Spider() res = file_response('files/chi_ssa_17.html') parsed_items = [item for item in spider.parse(res)] freezer.stop() return parsed_items
def test_gen_requests(): spider = Cook_housingAuthoritySpider() test_response = file_response('files/hacc_feed.txt', 'http://thehacc.org/events/feed/') requests = list(spider._gen_requests(test_response)) assert requests == [ 'http://thehacc.org/wp-json/tribe/events/v1/events/2836', 'http://thehacc.org/wp-json/tribe/events/v1/events/2816', 'http://thehacc.org/wp-json/tribe/events/v1/events/2650', 'http://thehacc.org/wp-json/tribe/events/v1/events/2882', 'http://thehacc.org/wp-json/tribe/events/v1/events/2858', 'http://thehacc.org/wp-json/tribe/events/v1/events/2879', ]
def test_gen_html_filenames(monkeypatch): FILES_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'files') test_filenames = [ FILES_DIR + '/testspider_articles.html', FILES_DIR + '/testspider_staff.html', FILES_DIR + '/testspider_is-chicago-any-less-segregated.html', ] patched_fetch_url = Mock() patched_fetch_url.side_effect = [ file_response('files/testspider_articles.html.example'), file_response('files/testspider_staff.html.example'), file_response( 'files/testspider_is-chicago-any-less-segregated.html.example'), ] monkeypatch.setattr('scripts.generate_spider._fetch_url', patched_fetch_url) rendered_filenames = generate_spider._gen_html(SPIDER_NAME, SPIDER_START_URLS, session=session) assert rendered_filenames == test_filenames for f in rendered_filenames: os.remove(f)
def test_gen_html_content(monkeypatch): patched_fetch_url = Mock() patched_fetch_url.return_value = file_response( 'files/testspider_articles.html.example') monkeypatch.setattr('scripts.generate_spider._fetch_url', patched_fetch_url) rendered_filenames = generate_spider._gen_html(SPIDER_NAME, [SPIDER_START_URLS[0]], session=session) test_file_content = read_test_file_content( 'files/testspider_articles.html.example') rendered_content = read_test_file_content('files/testspider_articles.html') test_dom = fromstring(test_file_content) rendered_dom = fromstring(rendered_content) test_title = test_dom.xpath('//title')[0].text rendered_title = rendered_dom.xpath('//title')[0].text assert test_title == rendered_title for f in rendered_filenames: os.remove(f)
from datetime import date, time import pytest from tests.utils import file_response from city_scrapers.constants import CANCELED from city_scrapers.spiders.det_great_lakes_water_authority import DetGreatLakesWaterAuthoritySpider test_response = file_response( 'files/det_great_lakes_water_authority.html', 'http://www.glwater.org/events/' ) spider = DetGreatLakesWaterAuthoritySpider() requests = [request for request in spider.parse(test_response)] test_ics_response = file_response( 'files/det_great_lakes_water_authority.ics', 'http://www.glwater.org/events/?ical=1&tribe_display=month' ) parsed_items = [item for item in spider._parse_ical(test_ics_response)] parsed_items = sorted(parsed_items, key=lambda x: (x['start']['date'], x['start']['time'])) def test_requests(): requests = [request for request in spider.parse(test_response)] urls = {r.url for r in requests} # spider should yield ical for month + request for next month calendar assert len(requests) == 2 assert urls == { 'http://www.glwater.org/events/?ical=1&tribe_display=month', 'http://www.glwater.org/events/2018-08/', }
from datetime import date, time import pytest from freezegun import freeze_time from tests.utils import file_response from city_scrapers.spiders.wayne_economic_development import WayneEconomicDevelopmentSpider freezer = freeze_time('2018-03-27 12:00:01') freezer.start() test_response = file_response( 'files/wayne_economic-development.html', url= 'https://www.waynecounty.com/elected/commission/economic-development.aspx') spider = WayneEconomicDevelopmentSpider() parsed_items = [ item for item in spider.parse(test_response) if isinstance(item, dict) ] freezer.stop() # PARAMETRIZED TESTS @pytest.mark.parametrize('item', parsed_items) def test_event_description(item): assert item['event_description'] == '' @pytest.mark.parametrize('item', parsed_items) def test_location(item): expected_location = ({
# -*- coding: utf-8 -*- import pytest from tests.utils import file_response from documenters_aggregator.spiders.il_pubhealth import Il_pubhealthSpider test_response = file_response('files/il_pubhealth.html') spider = Il_pubhealthSpider() parsed_items = [item for item in spider.parse(test_response) if isinstance(item, dict)] def test_name(): assert parsed_items[2]['name'] == 'PAC: Maternal Mortality Review Committee Meeting' def test_description(): assert parsed_items[1]['description'] == """CONFERENCE ROOMS 122 S. Michigan, 7th Floor, Chicago, Room 711 535 West Jefferson St., 5th Floor, Springfield Conference Call Information Conference Call-In#: 888.494.4032 Access Code: 6819028741 Interested persons may contact the Office of Women’s Health at 312-814-4035 for information""" def test_start_time(): assert parsed_items[4]['start_time'] == '2017-08-09T11:00:00-05:00' def test_end_time():
import pytest # Shared properties between two different page / meeting types import scrapy from freezegun import freeze_time from city_scrapers.spiders.det_brownfield_redevelopment_authority import DetBrownfieldRedevelopmentAuthoritySpider from tests.utils import file_response LOCATION = {'neighborhood': '', 'name': 'DEGC, Guardian Building', 'address': '500 Griswold, Suite 2200, Detroit'} DBRA = 'Board of Directors' DBRA_CAC = 'Community Advisory Committee' test_response = file_response('files/det_brownfield_redevelopment_authority.html', 'http://www.degc.org/public-authorities/dbra/') freezer = freeze_time('2018-07-28 12:00:01') spider = DetBrownfieldRedevelopmentAuthoritySpider() freezer.start() parsed_items = [item for item in spider._next_meeting(test_response) if isinstance(item, dict)] freezer.stop() def test_initial_request_count(): items = list(spider.parse(test_response)) assert len(items) == 5 urls = {r.url for r in items if isinstance(r, scrapy.Request)} assert urls == { 'http://www.degc.org/public-authorities/dbra/fy-2018-2019-notices-agendas-and-minutes/', 'http://www.degc.org/public-authorities/dbra/fy-2017-2018-meetings/', 'http://www.degc.org/public-authorities/dbra/dbra-fy-2016-2017-meetings/'
import pytest from tests.utils import file_response from documenters_aggregator.spiders.chi_pubhealth import Chi_pubhealthSpider test_response = file_response( 'files/chi_pubhealth.html', url= 'https://www.cityofchicago.org/city/en/depts/cdph/supp_info/boh/2017-board-of-health.html' ) spider = Chi_pubhealthSpider() parsed_items = [ item for item in spider.parse(test_response) if isinstance(item, dict) ] def test_name(): assert parsed_items[0]['name'] == 'Board of Health Meeting' def test_description(): assert parsed_items[0][ 'description'] == 'The Chicago Board of Health is scheduled to meet on the third Wednesday of each month from 9:00am-10:30am. The meetings are held at the Chicago Department of Public Health, DePaul Center, 333 S. State Street, 2nd Floor Board Room.' def test_start_time(): assert parsed_items[0]['start_time'] == '2017-01-18T09:00:00-06:00' def test_end_time(): assert parsed_items[0]['end_time'] == '2017-01-18T10:30:00-06:00'
from datetime import date, time import pytest from freezegun import freeze_time from tests.utils import file_response from city_scrapers.spiders.det_charter_school_boards import DetCharterSchoolBoardsSpider test_response = file_response( 'files/det_charter_school_boards.html', 'http://detroitk12.org/admin/charter_schools/boards/') freezer = freeze_time('2018-08-15 12:00:01') freezer.start() spider = DetCharterSchoolBoardsSpider() parsed_items = [ item for item in spider.parse(test_response) if isinstance(item, dict) ] freezer.stop() def test_event_count(): non_calendar_events = 8 calendar_events = 81 assert len(parsed_items) == non_calendar_events + calendar_events def test_starts_with_day_of_week(): # page is just on big block of text with meeting details nested in # p tags. Can id meetings by finding p tag text that starts with DoW assert spider._startswith_day_of_week() == \
from datetime import date, time import pytest from tests.utils import file_response from city_scrapers.spiders.chi_landmark_commission import ChiLandmarkCommissionSpider test_response = file_response( 'files/chi_landmark_commission_landmarks_commission.html', 'https://www.cityofchicago.org/city/en/depts/dcd/supp_info/landmarks_commission.html' ) spider = ChiLandmarkCommissionSpider() parsed_items = [ item for item in spider.parse(test_response) if isinstance(item, dict) ] def test_meeting_count(): # 12 mtgs / yr * 10 yrs + 3 extra (May 2018, March 2014, June 2013) assert len(parsed_items) == 123 def test_unique_id_count(): assert len(set([item['id'] for item in parsed_items])) == 123 def test__type(): assert parsed_items[0]['_type'] == 'event' def test_name():
# -*- coding: utf-8 -*- import pytest from tests.utils import file_response from city_scrapers.spiders.chi_schools import Chi_schoolsSpider test_response = file_response('files/cpsboe.html', url='http://www.cpsboe.org/meetings/planning-calendar') spider = Chi_schoolsSpider() parsed_items = [item for item in spider.parse(test_response) if isinstance(item, dict)] def test_event_count(): assert len(parsed_items) == 14 @pytest.mark.parametrize('item', parsed_items) def test_type(item): assert item['_type'] == 'event' # def test_id(): # assert parsed_items[0]['id'] == 'chi_schools/201707261030/x/monthly_board_meeting' @pytest.mark.parametrize('item', parsed_items) def test_name(item): assert item['name'] == 'Monthly Board Meeting' @pytest.mark.parametrize('item', parsed_items) def test_timezone(item):
from datetime import date, time import pytest from tests.utils import file_response from freezegun import freeze_time from city_scrapers.constants import PASSED, TENTATIVE, COMMISSION from city_scrapers.spiders.chi_ssa_14 import ChiSsa14Spider test_response = file_response('files/chi_ssa_14.html') spider = ChiSsa14Spider() freezer = freeze_time('2018-10-12 12:00:00') freezer.start() parsed_items = [ item for item in spider.parse(test_response) if isinstance(item, dict) ] freezer.stop() def test_start(): assert parsed_items[0]['start'] == { 'date': date(2018, 4, 25), 'time': time(19, 0), 'note': '' } def test_id():
from datetime import date, time import pytest from freezegun import freeze_time from tests.utils import file_response from city_scrapers.constants import COMMISSION, TENTATIVE from city_scrapers.spiders.det_city_planning import DetCityPlanningSpider freezer = freeze_time('2018-12-27') freezer.start() test_response = file_response( 'files/det_city_planning.html', url= 'https://www.detroitmi.gov/Government/Boards/City-Planning-Commission-Meetings' ) spider = DetCityPlanningSpider() parsed_items = [ item for item in spider.parse(test_response) if isinstance(item, dict) ] parsed_items = sorted(parsed_items, key=lambda x: x['start']['date']) freezer.stop() def test_name(): assert parsed_items[0]['name'] == 'City Planning Commission' def test_description():
import pytest from datetime import date, time from tests.utils import file_response from city_scrapers.constants import COMMITTEE, CONFIRMED from city_scrapers.spiders.cook_landbank import CookLandbankSpider file = file_response('files/cook_landbank.json') spider = CookLandbankSpider() test_response = file parsed_items = list(spider.parse(test_response)) def test_name(): assert parsed_items[0]['name'] == 'CCLBA Land Transactions Committee' def test_event_description(): assert parsed_items[0]['event_description'] == ( 'The Land Transactions Committee will convene on Friday, September ' '14th at the hour of 10:00 AM at the location of 69 W. Washington ' 'St., 22nd Floor, Conference Room ‘B”, Chicago, Illinois, to consider ' 'the following:' ) def test_start(): EXPECTED_START = { 'date': date(2018, 9, 14), 'time': time(10, 00), 'note': ''
import pytest import scrapy from datetime import datetime from tests.utils import file_response from documenters_aggregator.spiders.chi_buildings import Chi_buildingsSpider test_json_response = file_response('files/chi_buildings.json') test_event_response = file_response('files/chi_buildings.html') spider = Chi_buildingsSpider() # Setting spider date to time test files were generated spider.calendar_date = datetime(2018, 2, 18) class MockRequest(object): meta = {} def __getitem__(self, key): return self.meta['item'].get(key) def mock_request(*args, **kwargs): mock = MockRequest() mock.meta = {'item': {}} return mock setattr(scrapy, 'Request', mock_request) parsed_items = [item for item in spider.parse(test_json_response)] parsed_event = spider._parse_event(test_event_response)
import pytest from datetime import date, time from tests.utils import file_response from city_scrapers.spiders.chi_ward_night import ChiWardNightSpider, Calendar test_response = file_response('files/test_chi_ward_night.json') spider = ChiWardNightSpider(start_date=date(2017, 11, 1)) parsed_items = [ item for item in spider.parse(test_response) if isinstance(item, dict) ] def test_id(): assert parsed_items[0]['id'] == ( 'chi_ward_night/201711071600/x/ward_night_ward_1' ) def test_name(): assert parsed_items[0]['name'] == 'Ward Night: Ward 1' def test_event_description(): assert parsed_items[0]['event_description'] == ( 'Ward Night with Ald. Joe Moreno (Ward 1)\n\n' 'Frequency: Weekly\n' 'Day of the Week: Tuesday\n' 'Requires Sign-Up: No\n' 'Phone: 773.278.0101\n' 'Email: [email protected]\n'
from tests.utils import file_response from city_scrapers.spiders.cook_county import Cook_countySpider test_response = file_response( 'files/cook_county_event.html', url= 'https://www.cookcountyil.gov/event/cook-county-zoning-building-committee-6' ) spider = Cook_countySpider() item = spider._parse_event(test_response) def test_name(): assert item['name'] == 'ZBA Public Hearing' def test_start_time(): assert item['start_time'].isoformat() == '2017-11-15T13:00:00-06:00' def test_end_time(): assert item['end_time'].isoformat() == '2017-11-15T15:00:00-06:00' # def test_id(): # assert item['id'] == 'cook_county/201711151300/x/zba_public_hearing' def test_all_day(): assert item['all_day'] is False
# -*- coding: utf-8 -*- import pytest from tests.utils import file_response from documenters_aggregator.spiders.cook_hospitals import Cook_hospitalsSpider test_response = file_response( 'files/cook_hospitals.html', url= 'http://www.cookcountyhhs.org/about-cchhs/governance/board-committee-meetings/' ) spider = Cook_hospitalsSpider() parsed_items = [ item for item in spider.parse(test_response) if isinstance(item, dict) ] def test_name(): assert parsed_items[0]['name'] == 'Meetings of the Board of Directors' def test_description(): assert parsed_items[0][ 'description'] == 'http://www.cookcountyhhs.org/wp-content/uploads/2016/01/01-27-17-Board-Agenda.pdf' def test_start_time(): assert parsed_items[0]['start_time'].isoformat( ) == '2017-01-27T09:00:00-06:00'
import calendar from datetime import datetime from pytz import timezone from tests.utils import file_response from documenters_aggregator.spiders.cook_pubhealth import Cook_pubhealthSpider test_response = file_response( 'files/cook_pubhealth_321.html', 'http://www.cookcountypublichealth.org/events-view/321') spider = Cook_pubhealthSpider() item = spider.parse_event_page(test_response) def test_name(): assert item['name'] == 'Fresh Food Market: Robbins Health Center' def test_description(): expected_description = ( 'This summer, Fresh Food Markets will be hosted weekly, ' '10:00 a.m. - 2:00 p.m. at three Cook County Health and Hospital System health ' 'centers in South Suburban Cook County.Robbins Health Center, 13450 W. Kedzie., Robbins.' 'Black Oaks Center, a local nonprofit, is partnering with CCHHS to make fresh fruits and ' 'vegetables available for sale at CCHHS health centers. Cash, credit, and Link cards ' '(SNAP/EBT/food stamps) are accepted as forms of payment. Persons using their SNAP/Link ' 'card benefits to purchase will receive a Link Match Coupon, as part of the Link Up ' 'Illinois program, good towards the next purchase of fruits and/or vegetables.If you ' 'have Medicaid or receive a medical card, you may be eligible for SNAP. Our partners ' 'at the Greater Chicago Food Depository can assist with SNAP applications. Visit their' 'websiteor call 773-843-5416 to reach their Benefits Outreach team.')
import pytest from tests.utils import file_response from city_scrapers.spiders.metra_board import Metra_boardSpider test_response = file_response('files/metra_board.html') spider = Metra_boardSpider() parsed_items = [ item for item in spider.parse(test_response) if isinstance(item, dict) ] def test_name(): assert parsed_items[0]['name'] == 'Metra February 2018 Board Meeting' def test_start_time(): assert parsed_items[0]['start_time'].isoformat( ) == '2018-02-21T10:30:00-06:00' # def test_id(): # assert parsed_items[0]['id'] == 'metra_board/201802211030/x/metra_february_2018_board_meeting' def test_location(): assert parsed_items[0]['location'] == { 'url': '', 'name': '', 'address': '547 West Jackson Boulevard, Chicago, IL', 'coordinates': {
from tests.utils import file_response from documenters_aggregator.spiders.chi_city_college import Chi_cityCollegeSpider test_response = file_response('files/ccc_event.html') spider = Chi_cityCollegeSpider() item = spider.parse_event_page(test_response) def test_name(): assert item['name'] == 'November 2017 Regular Board Meeting' def test_start_time(): assert item['start_time'].isoformat() == '2017-11-02T09:00:00-05:00' def test_end_time(): assert item['end_time'] is None def test_id(): assert item['id'] == 'chi_city_college/201711020900/x/november_2017_regular_board_meeting' def test_all_day(): assert item['all_day'] is False def test_classification(): assert item['classification'] == 'Not classified'
from datetime import datetime import pytest from city_scrapers_core.constants import COMMISSION, PASSED from tests.utils import file_response from city_scrapers.spiders.chi_zoning_board import ChiZoningBoardSpider test_response = file_response( 'files/chi_zoning_board.html', 'https://www.chicago.gov/city/en/depts/dcd/supp_info/zoning_board_of_appeals.html' ) spider = ChiZoningBoardSpider() parsed_items = [item for item in spider.parse(test_response)] def test_meeting_count(): # 12 mtgs / yr * 10 yrs + 2 extra (May 2015, and August 2014) assert len(parsed_items) == 122 def test_unique_id_count(): assert len(set([item['id'] for item in parsed_items])) == 122 def test_title(): assert parsed_items[0]['title'] == 'Board of Appeals' def test_description(): assert parsed_items[0]['description'] == ''
from datetime import datetime import pytest from city_scrapers_core.constants import COMMISSION, PASSED from freezegun import freeze_time from tests.utils import file_response from city_scrapers.spiders.chi_board_elections import ChiBoardElectionsSpider freezer = freeze_time('2018-11-30 12:00:01') freezer.start() test_response = file_response( 'files/chi_board_elections.html', url='https://app.chicagoelections.com/pages/en/board-meetings.aspx') spider = ChiBoardElectionsSpider() parsed_items = [item for item in spider._next_meeting(test_response)] def test_title(): assert parsed_items[0]['title'] == 'Electoral Board' def test_description(): assert parsed_items[0]['description'] == '' def test_start(): assert parsed_items[0]['start'] == datetime(2018, 11, 27, 9, 30) def test_end():
import pytest from tests.utils import file_response from documenters_aggregator.spiders.chi_library import Chi_librarySpider # def test_tests(): # print('Please write some tests for this spider or at least disable this one.') # assert False test_response = file_response( 'files/chi_library.html', url='https://www.chipublib.org/board-of-directors/board-meeting-schedule/') spider = Chi_librarySpider() parsed_items = [ item for item in spider.parse(test_response) if isinstance(item, dict) ] def test_name(): assert parsed_items[0]['name'] == 'Chicago Public Library Board Meeting' def test_description(): assert parsed_items[0][ 'description'] == 'There are no meetings in February, July and August. Entry into these meetings is permitted at 8:45 a.m.' def test_start_time(): assert parsed_items[0]['start_time'] == '2017-01-17T09:00:00-06:00' def test_end_time():
from datetime import date, time import pytest # Adapted from test_chi_parks.py from freezegun import freeze_time from tests.utils import file_response from city_scrapers.spiders.wayne_public_services import WaynePublicServicesSpider freezer = freeze_time('2018-03-27 12:00:01') freezer.start() test_response = file_response( 'files/wayne_public_services.html', url='https://www.waynecounty.com/elected/commission/public-services.aspx') spider = WaynePublicServicesSpider() parsed_items = [ item for item in spider.parse(test_response) if isinstance(item, dict) ] freezer.stop() # PARAMETRIZED TESTS @pytest.mark.parametrize('item', parsed_items) def test_event_description(item): assert item['event_description'] == '' @pytest.mark.parametrize('item', parsed_items) def test_location(item): expected_location = ({