def test_parser_find_all_divs_with_specific_attrs(): # 21 elements represents the real results + the one row of localized results h = ParseHTML(SALARIES) attrs = {'class': 'dataRow first'} tag = 'tr' res = [ele for ele in h.find_all(tag, attrs)] assert(len(res) == 21)
def parse_salaries_data(data, tag="tbody", class_type="dataRow"): data = ParseHTML(data) for row in data.find_all(tag): tr = row.tr if not tr.has_key("class"): continue if tr["class"].find(class_type) == -1: continue yield tr
def parse_jobs_data(data, tag="div", class_type="jobListing"): data = ParseHTML(data) for row in data.find_all(tag): if not row.has_key("class"): continue this_class = row["class"].split(" ") if not class_type in this_class: continue yield row
def test_parser_find_all_elements(): h = ParseHTML(SALARIES) res = [ele for ele in h.find_all('div')] assert(len(res) == 260)
def test_parser_find_all_elements(): h = ParseHTML(SALARIES) res = [ele for ele in h.find_all(True)] assert(len(res) == 1054)