def getEstablishments(city): establishmentsFound = [] start = 1 count = 1000 more = True while more: establishmentList = scrapertools.getContent(BASE_URL + city['establishmentUrl']+'&start='+str(start)+'&count='+str(count)) if establishmentList.find(text='No documents found') is not None: more = False continue start += count establishments = establishmentList.find_all('tr') for establishment in establishments: details = establishment.find_all('td') if len(details) == 4 and details[0] is not None and details[0].a is not None: establishmentsFound.append({ 'name': scrapertools.getText(details[0]), 'url': details[0].a['href'], 'address': scrapertools.getText(details[2]), 'locality': city['locality'], 'last_inspection_date': scrapertools.getText(details[3]) }) return establishmentsFound
def getEstablishmentDetails(establishment): establishmentDetails = scrapertools.getContent(BASE_URL + establishment['url']) establishment['city'] = re.sub('(<(/)?br>)|(\r)|(\n)', '', str(establishmentDetails.find(text=re.compile("Facility Location")).parent.next_sibling.find('br'))) geo = scrapertools.getLatLng(establishment['address'], establishment['city']) establishment['geo'] = {'type': "Point", 'coordinates': [geo['lat'], geo['lng']]} establishment['type'] = establishmentDetails.find(text=re.compile("Facility Type")).parent.next_sibling.string return establishment
def getCities(): citiesFound = [] cityNames = [] localityList = scrapertools.getContent(BASE_URL + LOCALITY_LIST_URL) localities = localityList.body.div.img.find_all_next('a') for locality in localities: cityList = scrapertools.getContent(BASE_URL + locality['href'] + CITY_LIST_URL) cities = cityList.find_all('a') for city in cities: name = str(city.string).strip() if name not in cityNames: print 'Adding ' + name cityNames.append(name) citiesFound.append({ 'name': name, 'locality': str(locality.string).strip(), 'baseUrl': city['href'][:city['href'].find('Food-List-ByName')], 'establishmentUrl': city['href'].replace('Count=30', '') }) return citiesFound
def getEstablishments(city): establishmentsFound = [] establishmentList = scrapertools.getContent(BASE_URL + city['establishmentUrl']) establishments = establishmentList.find_all('tr') for establishment in establishments: details = establishment.find_all('td') if len(details) == 4 and details[0] is not None and details[0].a is not None: establishmentsFound.append({ 'name': scrapertools.getText(details[0]), 'url': details[0].a['href'], 'address': scrapertools.getText(details[2]), 'locality': city['locality'], 'last_inspection_date': scrapertools.getText(details[3]) }) return establishmentsFound
def getInspections(establishment, cityUrl): inspectionsFound = [] establishmentDetails = scrapertools.getContent(BASE_URL + establishment['url']) inspections = establishmentDetails.find_all(text='Inspection Type')[0].find_parent('tr').find_all_next('tr') for inspection in inspections: details = inspection.find_all('td') if(details[0].a is None): continue violations = getViolations(BASE_URL + cityUrl + '/' + details[0].a['href']) inspectionsFound.append({ 'type': scrapertools.getText(details[0]), 'date': scrapertools.getText(details[1]), 'violations': violations }) return inspectionsFound
def getViolations(inspectionDetailsUrl): violationsFound = [] inspectionDetails = scrapertools.getContent(inspectionDetailsUrl) violations = inspectionDetails.find(text='Violations:').find_next('table') if violations is None: return [] violations = violations.find('tr').find_next_siblings() for violation in violations: details = violation.find_all('td') violationsFound.append({ 'code': scrapertools.getAllText(details[0])[0], 'repeat': any(['Repeat' in tag.string for tag in details[1].contents if tag.name == 'b']), 'critical': any(['Critical' in tag.string for tag in details[1].contents if tag.name == 'b']), 'corrected': any(['Corrected' in tag.string for tag in details[1].contents if tag.name == 'b']), 'correction': ' '.join([tag.string for tag in details[1].contents if tag.name == 'font']).strip(), 'observation': ' '.join([tag.string for tag in details[1].contents if tag.name == None]).strip() }) return violationsFound